summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/intel-pt-decoder
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /tools/perf/util/intel-pt-decoder
parentInitial commit. (diff)
downloadlinux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz
linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--tools/perf/util/intel-pt-decoder/Build36
-rw-r--r--tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk392
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.c96
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h244
-rw-r--r--tools/perf/util/intel-pt-decoder/inat_types.h29
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c606
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h244
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c2727
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h133
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c274
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h63
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c156
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h78
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c638
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h77
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt1072
16 files changed, 6865 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 000000000..1b704fbea
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,36 @@
+libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
+inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+# Busybox's diff doesn't have -I, avoid warning in the case
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+ @(diff -I 2>&1 | grep -q 'option requires an argument' && \
+ test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+
+ifeq ($(CC_NO_CLANG), 1)
+ CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+endif
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
new file mode 100644
index 000000000..ddd5c4c21
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -0,0 +1,392 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+ if (sprintf("%x", 0) != "0")
+ return "Your awk has a printf-format problem."
+ return ""
+}
+
+# Clear working vars
+function clear_vars() {
+ delete table
+ delete lptable2
+ delete lptable1
+ delete lptable3
+ eid = -1 # escape id
+ gid = -1 # group id
+ aid = -1 # AVX id
+ tname = ""
+}
+
+BEGIN {
+ # Implementation error checking
+ awkchecked = check_awk_implement()
+ if (awkchecked != "") {
+ print "Error: " awkchecked > "/dev/stderr"
+ print "Please try to use gawk." > "/dev/stderr"
+ exit 1
+ }
+
+ # Setup generating tables
+ print "/* x86 opcode map generated from x86-opcode-map.txt */"
+ print "/* Do not change this code. */\n"
+ ggid = 1
+ geid = 1
+ gaid = 0
+ delete etable
+ delete gtable
+ delete atable
+
+ opnd_expr = "^[A-Za-z/]"
+ ext_expr = "^\\("
+ sep_expr = "^\\|$"
+ group_expr = "^Grp[0-9A-Za-z]+"
+
+ imm_expr = "^[IJAOL][a-z]"
+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+ imm_flag["Ob"] = "INAT_MOFFSET"
+ imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+ force64_expr = "\\([df]64\\)"
+ rex_expr = "^REX(\\.[XRWB]+)*"
+ fpu_expr = "^ESC" # TODO
+
+ lprefix1_expr = "\\((66|!F3)\\)"
+ lprefix2_expr = "\\(F3\\)"
+ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+ lprefix_expr = "\\((66|F2|F3)\\)"
+ max_lprefix = 4
+
+ # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^[vk].*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
+ # All opcodes with (ev) superscript supports *only* EVEX prefix
+ evexonly_expr = "\\(ev\\)"
+
+ prefix_expr = "\\(Prefix\\)"
+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+ prefix_num["REPNE"] = "INAT_PFX_REPNE"
+ prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+ prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+ prefix_num["LOCK"] = "INAT_PFX_LOCK"
+ prefix_num["SEG=CS"] = "INAT_PFX_CS"
+ prefix_num["SEG=DS"] = "INAT_PFX_DS"
+ prefix_num["SEG=ES"] = "INAT_PFX_ES"
+ prefix_num["SEG=FS"] = "INAT_PFX_FS"
+ prefix_num["SEG=GS"] = "INAT_PFX_GS"
+ prefix_num["SEG=SS"] = "INAT_PFX_SS"
+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+ prefix_num["EVEX"] = "INAT_PFX_EVEX"
+
+ clear_vars()
+}
+
+function semantic_error(msg) {
+ print "Semantic error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+function debug(msg) {
+ print "DEBUG: " msg
+}
+
+function array_size(arr, i,c) {
+ c = 0
+ for (i in arr)
+ c++
+ return c
+}
+
+/^Table:/ {
+ print "/* " $0 " */"
+ if (tname != "")
+ semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+ if (NF != 1) {
+ # escape opcode table
+ ref = ""
+ for (i = 2; i <= NF; i++)
+ ref = ref $i
+ eid = escape[ref]
+ tname = sprintf("inat_escape_table_%d", eid)
+ }
+}
+
+/^AVXcode:/ {
+ if (NF != 1) {
+ # AVX/escape opcode table
+ aid = $2
+ if (gaid <= aid)
+ gaid = aid + 1
+ if (tname == "") # AVX only opcode table
+ tname = sprintf("inat_avx_table_%d", $2)
+ }
+ if (aid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+ print "/* " $0 " */"
+ if (!($2 in group))
+ semantic_error("No group: " $2 )
+ gid = group[$2]
+ tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+ print "const insn_attr_t " name " = {"
+ for (i = 0; i < n; i++) {
+ id = sprintf(fmt, i)
+ if (tbl[id])
+ print " [" id "] = " tbl[id] ","
+ }
+ print "};"
+}
+
+/^EndTable/ {
+ if (gid != -1) {
+ # print group tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,3] = tname "_3"
+ }
+ } else {
+ # print primary/escaped tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,0] = tname
+ if (aid >= 0)
+ atable[aid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,1] = tname "_1"
+ if (aid >= 0)
+ atable[aid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,2] = tname "_2"
+ if (aid >= 0)
+ atable[aid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,3] = tname "_3"
+ if (aid >= 0)
+ atable[aid,3] = tname "_3"
+ }
+ }
+ print ""
+ clear_vars()
+}
+
+function add_flags(old,new) {
+ if (old && new)
+ return old " | " new
+ else if (old)
+ return old
+ else
+ return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd, i,j,imm,mod)
+{
+ imm = null
+ mod = null
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
+ if (match(i, imm_expr) == 1) {
+ if (!imm_flag[i])
+ semantic_error("Unknown imm opnd: " i)
+ if (imm) {
+ if (i != "Ib")
+ semantic_error("Second IMM error")
+ imm = add_flags(imm, "INAT_SCNDIMM")
+ } else
+ imm = imm_flag[i]
+ } else if (match(i, modrm_expr))
+ mod = "INAT_MODRM"
+ }
+ return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+ if (NR == 1)
+ next
+ # get index
+ idx = "0x" substr($1, 1, index($1,":") - 1)
+ if (idx in table)
+ semantic_error("Redefine " idx " in " tname)
+
+ # check if escaped opcode
+ if ("escape" == $2) {
+ if ($3 != "#")
+ semantic_error("No escaped name")
+ ref = ""
+ for (i = 4; i <= NF; i++)
+ ref = ref $i
+ if (ref in escape)
+ semantic_error("Redefine escape (" ref ")")
+ escape[ref] = geid
+ geid++
+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+ next
+ }
+
+ variant = null
+ # converts
+ i = 2
+ while (i <= NF) {
+ opcode = $(i++)
+ delete opnds
+ ext = null
+ flags = null
+ opnd = null
+ # parse one opcode
+ if (match($i, opnd_expr)) {
+ opnd = $i
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
+ }
+ if (match($i, ext_expr))
+ ext = $(i++)
+ if (match($i, sep_expr))
+ i++
+ else if (i < NF)
+ semantic_error($i " is not a separator")
+
+ # check if group opcode
+ if (match(opcode, group_expr)) {
+ if (!(opcode in group)) {
+ group[opcode] = ggid
+ ggid++
+ }
+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+ }
+ # check force(or default) 64bit
+ if (match(ext, force64_expr))
+ flags = add_flags(flags, "INAT_FORCE64")
+
+ # check REX prefix
+ if (match(opcode, rex_expr))
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+ # check coprocessor escape : TODO
+ if (match(opcode, fpu_expr))
+ flags = add_flags(flags, "INAT_MODRM")
+
+ # check VEX codes
+ if (match(ext, evexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+ else if (match(ext, vexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+ flags = add_flags(flags, "INAT_VEXOK")
+
+ # check prefixes
+ if (match(ext, prefix_expr)) {
+ if (!prefix_num[opcode])
+ semantic_error("Unknown prefix: " opcode)
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+ }
+ if (length(flags) == 0)
+ continue
+ # check if last prefix
+ if (match(ext, lprefix1_expr)) {
+ lptable1[idx] = add_flags(lptable1[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix2_expr)) {
+ lptable2[idx] = add_flags(lptable2[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix3_expr)) {
+ lptable3[idx] = add_flags(lptable3[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (!match(ext, lprefix_expr)){
+ table[idx] = add_flags(table[idx],flags)
+ }
+ }
+ if (variant)
+ table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+ if (awkchecked != "")
+ exit 1
+ # print escape opcode map's array
+ print "/* Escape opcode map array */"
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print " ["i"]["j"] = "etable[i,j]","
+ print "};\n"
+ # print group opcode map's array
+ print "/* Group opcode map array */"
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print " ["i"]["j"] = "gtable[i,j]","
+ print "};\n"
+ # print AVX opcode map's array
+ print "/* AVX opcode map array */"
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print " ["i"]["j"] = "atable[i,j]","
+ print "};"
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c
new file mode 100644
index 000000000..906d94aa0
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.c
@@ -0,0 +1,96 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "insn.h"
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+ return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+ insn_attr_t esc_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_escape_id(esc_attr);
+
+ table = inat_escape_tables[n][0];
+ if (!table)
+ return 0;
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+ insn_attr_t grp_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_group_id(grp_attr);
+
+ table = inat_group_tables[n][0];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ }
+ return table[X86_MODRM_REG(modrm)] |
+ inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+ insn_byte_t vex_p)
+{
+ const insn_attr_t *table;
+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+ return 0;
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
+ if (!table)
+ return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
new file mode 100644
index 000000000..52dc8d911
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "inat_types.h"
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX 15 /* EVEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE 0
+#define INAT_SEG_REG_DEFAULT 1
+#define INAT_SEG_REG_CS 2
+#define INAT_SEG_REG_SS 3
+#define INAT_SEG_REG_DS 4
+#define INAT_SEG_REG_ES 5
+#define INAT_SEG_REG_FS 6
+#define INAT_SEG_REG_GS 7
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+ attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+ return attr & INAT_EVEXONLY;
+}
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 000000000..cb3c20ce3
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
new file mode 100644
index 000000000..ca983e2be
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -0,0 +1,606 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include "inat.h"
+#include "insn.h"
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n) \
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn) \
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n) \
+ ({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n) \
+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64: !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
+ memset(insn, 0, sizeof(*insn));
+ insn->kaddr = kaddr;
+ insn->end_kaddr = kaddr + buf_len;
+ insn->next_byte = kaddr;
+ insn->x86_64 = x86_64 ? 1 : 0;
+ insn->opnd_bytes = 4;
+ if (x86_64)
+ insn->addr_bytes = 8;
+ else
+ insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn: &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+ insn_byte_t b, lb;
+ int i, nb;
+
+ if (prefixes->got)
+ return;
+
+ nb = 0;
+ lb = 0;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ while (inat_is_legacy_prefix(attr)) {
+ /* Skip if same prefix */
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == b)
+ goto found;
+ if (nb == 4)
+ /* Invalid instruction */
+ break;
+ prefixes->bytes[nb++] = b;
+ if (inat_is_address_size_prefix(attr)) {
+ /* address size switches 2/4 or 4/8 */
+ if (insn->x86_64)
+ insn->addr_bytes ^= 12;
+ else
+ insn->addr_bytes ^= 6;
+ } else if (inat_is_operand_size_prefix(attr)) {
+ /* oprand size switches 2/4 */
+ insn->opnd_bytes ^= 6;
+ }
+found:
+ prefixes->nbytes++;
+ insn->next_byte++;
+ lb = b;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ }
+ /* Set the last prefix */
+ if (lb && lb != insn->prefixes.bytes[3]) {
+ if (unlikely(insn->prefixes.bytes[3])) {
+ /* Swap the last prefix */
+ b = insn->prefixes.bytes[3];
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == lb)
+ prefixes->bytes[i] = b;
+ }
+ insn->prefixes.bytes[3] = lb;
+ }
+
+ /* Decode REX prefix */
+ if (insn->x86_64) {
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_rex_prefix(attr)) {
+ insn->rex_prefix.value = b;
+ insn->rex_prefix.nbytes = 1;
+ insn->next_byte++;
+ if (X86_REX_W(b))
+ /* REX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ }
+ }
+ insn->rex_prefix.got = 1;
+
+ /* Decode VEX prefix */
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_vex_prefix(attr)) {
+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+ if (!insn->x86_64) {
+ /*
+ * In 32-bits mode, if the [7:6] bits (mod bits of
+ * ModRM) on the second byte are not 11b, it is
+ * LDS or LES or BOUND.
+ */
+ if (X86_MODRM_MOD(b2) != 3)
+ goto vex_end;
+ }
+ insn->vex_prefix.bytes[0] = b;
+ insn->vex_prefix.bytes[1] = b2;
+ if (inat_is_evex_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+ insn->vex_prefix.bytes[3] = b2;
+ insn->vex_prefix.nbytes = 4;
+ insn->next_byte += 4;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else if (inat_is_vex3_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ insn->vex_prefix.nbytes = 3;
+ insn->next_byte += 3;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn->vex_prefix.nbytes = 2;
+ insn->next_byte += 2;
+ }
+ }
+vex_end:
+ insn->vex_prefix.got = 1;
+
+ prefixes->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+ struct insn_field *opcode = &insn->opcode;
+ insn_byte_t op;
+ int pfx_id;
+ if (opcode->got)
+ return;
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[0] = op;
+ opcode->nbytes = 1;
+
+ /* Check if there is VEX prefix or not */
+ if (insn_is_avx(insn)) {
+ insn_byte_t m, p;
+ m = insn_vex_m_bits(insn);
+ p = insn_vex_p_bits(insn);
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+ (!inat_accept_vex(insn->attr) &&
+ !inat_is_group(insn->attr)))
+ insn->attr = 0; /* This instruction is bad */
+ goto end; /* VEX has only 1 byte for opcode */
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+ while (inat_is_escape(insn->attr)) {
+ /* Get escaped opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[opcode->nbytes++] = op;
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+ if (inat_must_vex(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+end:
+ opcode->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
+ if (modrm->got)
+ return;
+ if (!insn->opcode.got)
+ insn_get_opcode(insn);
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+ modrm->value = mod;
+ modrm->nbytes = 1;
+ if (inat_is_group(insn->attr)) {
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
+ modrm->got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte. No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+
+ if (!insn->x86_64)
+ return 0;
+ if (!modrm->got)
+ insn_get_modrm(insn);
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+ insn_byte_t modrm;
+
+ if (insn->sib.got)
+ return;
+ if (!insn->modrm.got)
+ insn_get_modrm(insn);
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+ insn->sib.value = get_next(insn_byte_t, insn);
+ insn->sib.nbytes = 1;
+ }
+ }
+ insn->sib.got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+ insn_byte_t mod, rm, base;
+
+ if (insn->displacement.got)
+ return;
+ if (!insn->sib.got)
+ insn_get_sib(insn);
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+ * mod = 00 - no displacement fields (exceptions below)
+ * mod = 01 - 1-byte displacement field
+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+ * address size = 2 (0x67 prefix in 32-bit mode)
+ * mod = 11 - no memory operand
+ *
+ * If address size = 2...
+ * mod = 00, r/m = 110 - displacement field is 2 bytes
+ *
+ * If address size != 2...
+ * mod != 11, r/m = 100 - SIB byte exists
+ * mod = 00, SIB base = 101 - displacement field is 4 bytes
+ * mod = 00, r/m = 101 - rip-relative addressing, displacement
+ * field is 4 bytes
+ */
+ mod = X86_MODRM_MOD(insn->modrm.value);
+ rm = X86_MODRM_RM(insn->modrm.value);
+ base = X86_SIB_BASE(insn->sib.value);
+ if (mod == 3)
+ goto out;
+ if (mod == 1) {
+ insn->displacement.value = get_next(signed char, insn);
+ insn->displacement.nbytes = 1;
+ } else if (insn->addr_bytes == 2) {
+ if ((mod == 0 && rm == 6) || mod == 2) {
+ insn->displacement.value =
+ get_next(short, insn);
+ insn->displacement.nbytes = 2;
+ }
+ } else {
+ if ((mod == 0 && rm == 5) || mod == 2 ||
+ (mod == 0 && base == 5)) {
+ insn->displacement.value = get_next(int, insn);
+ insn->displacement.nbytes = 4;
+ }
+ }
+ }
+out:
+ insn->displacement.got = 1;
+
+err_out:
+ return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+ switch (insn->addr_bytes) {
+ case 2:
+ insn->moffset1.value = get_next(short, insn);
+ insn->moffset1.nbytes = 2;
+ break;
+ case 4:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ break;
+ case 8:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ insn->moffset2.value = get_next(int, insn);
+ insn->moffset2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->moffset1.got = insn->moffset2.got = 1;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case 4:
+ case 8:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ /* ptr16:64 is not exist (no segment) */
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate2.value = get_next(unsigned short, insn);
+ insn->immediate2.nbytes = 2;
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+ if (insn->immediate.got)
+ return;
+ if (!insn->displacement.got)
+ insn_get_displacement(insn);
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+ goto err_out;
+ goto done;
+ }
+
+ if (!inat_has_immediate(insn->attr))
+ /* no immediates */
+ goto done;
+
+ switch (inat_immediate_size(insn->attr)) {
+ case INAT_IMM_BYTE:
+ insn->immediate.value = get_next(signed char, insn);
+ insn->immediate.nbytes = 1;
+ break;
+ case INAT_IMM_WORD:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case INAT_IMM_DWORD:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ case INAT_IMM_QWORD:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ case INAT_IMM_PTR:
+ if (!__get_immptr(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD32:
+ if (!__get_immv32(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD:
+ if (!__get_immv(insn))
+ goto err_out;
+ break;
+ default:
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
+ }
+ if (inat_has_second_immediate(insn->attr)) {
+ insn->immediate2.value = get_next(signed char, insn);
+ insn->immediate2.nbytes = 1;
+ }
+done:
+ insn->immediate.got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+ if (insn->length)
+ return;
+ if (!insn->immediate.got)
+ insn_get_immediate(insn);
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
+}
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
new file mode 100644
index 000000000..1a01d0a41
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include "inat.h"
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
+ const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE 15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+static inline int insn_is_evex(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.nbytes == 4);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+ else /* EVEX */
+ return X86_EVEX_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx: Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix) \
+ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 000000000..e2a6c2295
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,2727 @@
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+
+#include "../cache.h"
+#include "../util.h"
+#include "../auxtrace.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define INTEL_PT_RETURN 1
+
+/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
+#define INTEL_PT_MAX_LOOPS 10000
+
+struct intel_pt_blk {
+ struct intel_pt_blk *prev;
+ uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+ struct intel_pt_blk *blk;
+ struct intel_pt_blk *spare;
+ int pos;
+};
+
+enum intel_pt_pkt_state {
+ INTEL_PT_STATE_NO_PSB,
+ INTEL_PT_STATE_NO_IP,
+ INTEL_PT_STATE_ERR_RESYNC,
+ INTEL_PT_STATE_IN_SYNC,
+ INTEL_PT_STATE_TNT_CONT,
+ INTEL_PT_STATE_TNT,
+ INTEL_PT_STATE_TIP,
+ INTEL_PT_STATE_TIP_PGD,
+ INTEL_PT_STATE_FUP,
+ INTEL_PT_STATE_FUP_NO_TIP,
+};
+
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+ switch (pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ case INTEL_PT_STATE_NO_IP:
+ case INTEL_PT_STATE_ERR_RESYNC:
+ case INTEL_PT_STATE_IN_SYNC:
+ case INTEL_PT_STATE_TNT_CONT:
+ return true;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ case INTEL_PT_STATE_FUP:
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ return false;
+ default:
+ return true;
+ };
+}
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ void *data;
+ struct intel_pt_state state;
+ const unsigned char *buf;
+ size_t len;
+ bool return_compression;
+ bool branch_enable;
+ bool mtc_insn;
+ bool pge;
+ bool have_tma;
+ bool have_cyc;
+ bool fixup_last_mtc;
+ bool have_last_ip;
+ enum intel_pt_param_flags flags;
+ uint64_t pos;
+ uint64_t last_ip;
+ uint64_t ip;
+ uint64_t cr3;
+ uint64_t timestamp;
+ uint64_t tsc_timestamp;
+ uint64_t ref_timestamp;
+ uint64_t sample_timestamp;
+ uint64_t ret_addr;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t cycle_cnt;
+ uint64_t cyc_ref_timestamp;
+ uint32_t last_mtc;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ uint32_t tsc_ctc_mult;
+ uint32_t tsc_slip;
+ uint32_t ctc_rem_mask;
+ int mtc_shift;
+ struct intel_pt_stack stack;
+ enum intel_pt_pkt_state pkt_state;
+ struct intel_pt_pkt packet;
+ struct intel_pt_pkt tnt;
+ int pkt_step;
+ int pkt_len;
+ int last_packet_type;
+ unsigned int cbr;
+ unsigned int cbr_seen;
+ unsigned int max_non_turbo_ratio;
+ double max_non_turbo_ratio_fp;
+ double cbr_cyc_to_tsc;
+ double calc_cyc_to_tsc;
+ bool have_calc_cyc_to_tsc;
+ int exec_mode;
+ unsigned int insn_bytes;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ uint64_t tot_insn_cnt;
+ uint64_t period_insn_cnt;
+ uint64_t period_mask;
+ uint64_t period_ticks;
+ uint64_t last_masked_timestamp;
+ bool continuous_period;
+ bool overflow;
+ bool set_fup_tx_flags;
+ bool set_fup_ptw;
+ bool set_fup_mwait;
+ bool set_fup_pwre;
+ bool set_fup_exstop;
+ unsigned int fup_tx_flags;
+ unsigned int tx_flags;
+ uint64_t fup_ptw_payload;
+ uint64_t fup_mwait_payload;
+ uint64_t fup_pwre_payload;
+ uint64_t cbr_payload;
+ uint64_t timestamp_insn_cnt;
+ uint64_t sample_insn_cnt;
+ uint64_t stuck_ip;
+ int no_progress;
+ int stuck_ip_prd;
+ int stuck_ip_cnt;
+ const unsigned char *next_buf;
+ size_t next_len;
+ unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+ int i;
+
+ for (i = 0; x != 1; i++)
+ x >>= 1;
+
+ return x << i;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+ if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+ uint64_t period;
+
+ period = intel_pt_lower_power_of_2(decoder->period);
+ decoder->period_mask = ~(period - 1);
+ decoder->period_ticks = period;
+ }
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+ if (!d)
+ return 0;
+ return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+ struct intel_pt_decoder *decoder;
+
+ if (!params->get_trace || !params->walk_insn)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct intel_pt_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->walk_insn = params->walk_insn;
+ decoder->pgd_ip = params->pgd_ip;
+ decoder->data = params->data;
+ decoder->return_compression = params->return_compression;
+ decoder->branch_enable = params->branch_enable;
+
+ decoder->flags = params->flags;
+
+ decoder->period = params->period;
+ decoder->period_type = params->period_type;
+
+ decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+ intel_pt_setup_period(decoder);
+
+ decoder->mtc_shift = params->mtc_period;
+ decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+ decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+ decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+ if (!decoder->tsc_ctc_ratio_n)
+ decoder->tsc_ctc_ratio_d = 0;
+
+ if (decoder->tsc_ctc_ratio_d) {
+ if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+ decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+ decoder->tsc_ctc_ratio_d;
+ }
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
+
+ intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+ intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+ intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+ intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+ return decoder;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk = stack->blk;
+
+ stack->blk = blk->prev;
+ if (!stack->spare)
+ stack->spare = blk;
+ else
+ free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+ if (!stack->pos) {
+ if (!stack->blk)
+ return 0;
+ intel_pt_pop_blk(stack);
+ if (!stack->blk)
+ return 0;
+ stack->pos = INTEL_PT_BLK_SIZE;
+ }
+ return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk;
+
+ if (stack->spare) {
+ blk = stack->spare;
+ stack->spare = NULL;
+ } else {
+ blk = malloc(sizeof(struct intel_pt_blk));
+ if (!blk)
+ return -ENOMEM;
+ }
+
+ blk->prev = stack->blk;
+ stack->blk = blk;
+ stack->pos = 0;
+ return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+ int err;
+
+ if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+ err = intel_pt_alloc_blk(stack);
+ if (err)
+ return err;
+ }
+
+ stack->blk->ip[stack->pos++] = ip;
+ return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+ while (stack->blk)
+ intel_pt_pop_blk(stack);
+ stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+ intel_pt_clear_stack(stack);
+ zfree(&stack->blk);
+ zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+ intel_pt_free_stack(&decoder->stack);
+ free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+ switch (code) {
+ case -ENOMEM:
+ return INTEL_PT_ERR_NOMEM;
+ case -ENOSYS:
+ return INTEL_PT_ERR_INTERN;
+ case -EBADMSG:
+ return INTEL_PT_ERR_BADPKT;
+ case -ENODATA:
+ return INTEL_PT_ERR_NODATA;
+ case -EILSEQ:
+ return INTEL_PT_ERR_NOINSN;
+ case -ENOENT:
+ return INTEL_PT_ERR_MISMAT;
+ case -EOVERFLOW:
+ return INTEL_PT_ERR_OVR;
+ case -ENOSPC:
+ return INTEL_PT_ERR_LOST;
+ case -ELOOP:
+ return INTEL_PT_ERR_NELOOP;
+ default:
+ return INTEL_PT_ERR_UNK;
+ }
+}
+
+static const char *intel_pt_err_msgs[] = {
+ [INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
+ [INTEL_PT_ERR_INTERN] = "Internal error",
+ [INTEL_PT_ERR_BADPKT] = "Bad packet",
+ [INTEL_PT_ERR_NODATA] = "No more data",
+ [INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+ [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+ [INTEL_PT_ERR_OVR] = "Overflow packet",
+ [INTEL_PT_ERR_LOST] = "Lost trace data",
+ [INTEL_PT_ERR_UNK] = "Unknown error!",
+ [INTEL_PT_ERR_NELOOP] = "Never-ending loop",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+ if (code < 1 || code >= INTEL_PT_ERR_MAX)
+ code = INTEL_PT_ERR_UNK;
+ strlcpy(buf, intel_pt_err_msgs[code], buflen);
+ return 0;
+}
+
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
+ uint64_t last_ip)
+{
+ uint64_t ip;
+
+ switch (packet->count) {
+ case 1:
+ ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+ packet->payload;
+ break;
+ case 2:
+ ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+ packet->payload;
+ break;
+ case 3:
+ ip = packet->payload;
+ /* Sign-extend 6-byte ip */
+ if (ip & (uint64_t)0x800000000000ULL)
+ ip |= (uint64_t)0xffff000000000000ULL;
+ break;
+ case 4:
+ ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+ packet->payload;
+ break;
+ case 6:
+ ip = packet->payload;
+ break;
+ default:
+ return 0;
+ }
+
+ return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+ decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+ decoder->have_last_ip = true;
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_set_last_ip(decoder);
+ decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+ decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Internal error\n");
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->pkt_len = 1;
+ decoder->pkt_step = 1;
+ intel_pt_decoder_log_packet(decoder);
+ if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+ intel_pt_log("ERROR: Bad packet\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR1;
+ }
+ return -EBADMSG;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_buffer buffer = { .buf = 0, };
+ int ret;
+
+ decoder->pkt_step = 0;
+
+ intel_pt_log("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret)
+ return ret;
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+ if (!decoder->len) {
+ intel_pt_log("No more data\n");
+ return -ENODATA;
+ }
+ if (!buffer.consecutive) {
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->ref_timestamp = buffer.ref_timestamp;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+ decoder->state.trace_nr = buffer.trace_nr;
+ intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+ decoder->ref_timestamp);
+ return -ENOLINK;
+ }
+
+ return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+{
+ if (!decoder->next_buf)
+ return intel_pt_get_data(decoder);
+
+ decoder->buf = decoder->next_buf;
+ decoder->len = decoder->next_len;
+ decoder->next_buf = 0;
+ decoder->next_len = 0;
+ return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+ unsigned char *buf = decoder->temp_buf;
+ size_t old_len, len, n;
+ int ret;
+
+ old_len = decoder->len;
+ len = decoder->len;
+ memcpy(buf, decoder->buf, len);
+
+ ret = intel_pt_get_data(decoder);
+ if (ret) {
+ decoder->pos += old_len;
+ return ret < 0 ? ret : -EINVAL;
+ }
+
+ n = INTEL_PT_PKT_MAX_SZ - len;
+ if (n > decoder->len)
+ n = decoder->len;
+ memcpy(buf + len, decoder->buf, n);
+ len += n;
+
+ ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ if (ret < (int)old_len) {
+ decoder->next_buf = decoder->buf;
+ decoder->next_len = decoder->len;
+ decoder->buf = buf;
+ decoder->len = old_len;
+ return intel_pt_bad_packet(decoder);
+ }
+
+ decoder->next_buf = decoder->buf + (ret - old_len);
+ decoder->next_len = decoder->len - (ret - old_len);
+
+ decoder->buf = buf;
+ decoder->len = ret;
+
+ return ret;
+}
+
+struct intel_pt_pkt_info {
+ struct intel_pt_decoder *decoder;
+ struct intel_pt_pkt packet;
+ uint64_t pos;
+ int pkt_len;
+ int last_packet_type;
+ void *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+ intel_pt_pkt_cb_t cb, void *data)
+{
+ struct intel_pt_pkt_info pkt_info;
+ const unsigned char *buf = decoder->buf;
+ size_t len = decoder->len;
+ int ret;
+
+ pkt_info.decoder = decoder;
+ pkt_info.pos = decoder->pos;
+ pkt_info.pkt_len = decoder->pkt_step;
+ pkt_info.last_packet_type = decoder->last_packet_type;
+ pkt_info.data = data;
+
+ while (1) {
+ do {
+ pkt_info.pos += pkt_info.pkt_len;
+ buf += pkt_info.pkt_len;
+ len -= pkt_info.pkt_len;
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ if (!ret)
+ return INTEL_PT_NEED_MORE_BYTES;
+ if (ret < 0)
+ return ret;
+
+ pkt_info.pkt_len = ret;
+ } while (pkt_info.packet.type == INTEL_PT_PAD);
+
+ ret = cb(&pkt_info);
+ if (ret)
+ return 0;
+
+ pkt_info.last_packet_type = pkt_info.packet.type;
+ }
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+ uint64_t cycle_cnt;
+ unsigned int cbr;
+ uint32_t last_mtc;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t tsc_timestamp;
+ uint64_t timestamp;
+ bool have_tma;
+ bool fixup_last_mtc;
+ bool from_mtc;
+ double cbr_cyc_to_tsc;
+};
+
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+ uint32_t *last_mtc)
+{
+ uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+ uint32_t mask = ~(first_missing_bit - 1);
+
+ *last_mtc |= mtc & mask;
+ if (*last_mtc >= mtc) {
+ *last_mtc -= first_missing_bit;
+ *last_mtc &= 0xff;
+ }
+}
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_decoder *decoder = pkt_info->decoder;
+ struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+ uint64_t timestamp;
+ double cyc_to_tsc;
+ unsigned int cbr;
+ uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PIP:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PAD:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ return 0;
+
+ case INTEL_PT_MTC:
+ if (!data->have_tma)
+ return 0;
+
+ mtc = pkt_info->packet.payload;
+ if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+ data->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &data->last_mtc);
+ }
+ if (mtc > data->last_mtc)
+ mtc_delta = mtc - data->last_mtc;
+ else
+ mtc_delta = mtc + 256 - data->last_mtc;
+ data->ctc_delta += mtc_delta << decoder->mtc_shift;
+ data->last_mtc = mtc;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = data->ctc_timestamp +
+ data->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = data->ctc_timestamp +
+ multdiv(data->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < data->timestamp)
+ return 1;
+
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ data->timestamp = timestamp;
+ return 0;
+ }
+
+ break;
+
+ case INTEL_PT_TSC:
+ /*
+ * For now, do not support using TSC packets - refer
+ * intel_pt_calc_cyc_to_tsc().
+ */
+ if (data->from_mtc)
+ return 1;
+ timestamp = pkt_info->packet.payload |
+ (data->timestamp & (0xffULL << 56));
+ if (data->from_mtc && timestamp < data->timestamp &&
+ data->timestamp - timestamp < decoder->tsc_slip)
+ return 1;
+ if (timestamp < data->timestamp)
+ timestamp += (1ULL << 56);
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ if (data->from_mtc)
+ return 1;
+ data->tsc_timestamp = timestamp;
+ data->timestamp = timestamp;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TMA:
+ if (data->from_mtc)
+ return 1;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return 0;
+
+ ctc = pkt_info->packet.payload;
+ fc = pkt_info->packet.count;
+ ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+ data->ctc_timestamp = data->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ data->ctc_timestamp -=
+ multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ data->ctc_delta = 0;
+ data->have_tma = true;
+ data->fixup_last_mtc = true;
+
+ return 0;
+
+ case INTEL_PT_CYC:
+ data->cycle_cnt += pkt_info->packet.payload;
+ return 0;
+
+ case INTEL_PT_CBR:
+ cbr = pkt_info->packet.payload;
+ if (data->cbr && data->cbr != cbr)
+ return 1;
+ data->cbr = cbr;
+ data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ return 1;
+ }
+
+ if (!data->cbr && decoder->cbr) {
+ data->cbr = decoder->cbr;
+ data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+ }
+
+ if (!data->cycle_cnt)
+ return 1;
+
+ cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+ if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+ cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ return 1;
+ }
+
+ decoder->calc_cyc_to_tsc = cyc_to_tsc;
+ decoder->have_calc_cyc_to_tsc = true;
+
+ if (data->cbr) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ } else {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+ cyc_to_tsc, pkt_info->pos);
+ }
+
+ return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+ bool from_mtc)
+{
+ struct intel_pt_calc_cyc_to_tsc_info data = {
+ .cycle_cnt = 0,
+ .cbr = 0,
+ .last_mtc = decoder->last_mtc,
+ .ctc_timestamp = decoder->ctc_timestamp,
+ .ctc_delta = decoder->ctc_delta,
+ .tsc_timestamp = decoder->tsc_timestamp,
+ .timestamp = decoder->timestamp,
+ .have_tma = decoder->have_tma,
+ .fixup_last_mtc = decoder->fixup_last_mtc,
+ .from_mtc = from_mtc,
+ .cbr_cyc_to_tsc = 0,
+ };
+
+ /*
+ * For now, do not support using TSC packets for at least the reasons:
+ * 1) timing might have stopped
+ * 2) TSC packets within PSB+ can slip against CYC packets
+ */
+ if (!from_mtc)
+ return;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+ int ret;
+
+ decoder->last_packet_type = decoder->packet.type;
+
+ do {
+ decoder->pos += decoder->pkt_step;
+ decoder->buf += decoder->pkt_step;
+ decoder->len -= decoder->pkt_step;
+
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ ret = intel_pt_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret == INTEL_PT_NEED_MORE_BYTES &&
+ decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+ ret = intel_pt_get_split_packet(decoder);
+ if (ret < 0)
+ return ret;
+ }
+ if (ret <= 0)
+ return intel_pt_bad_packet(decoder);
+
+ decoder->pkt_len = ret;
+ decoder->pkt_step = ret;
+ intel_pt_decoder_log_packet(decoder);
+ } while (decoder->packet.type == INTEL_PT_PAD);
+
+ return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (decoder->continuous_period) {
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ return 1;
+ } else {
+ timestamp += 1;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp) {
+ decoder->last_masked_timestamp = masked_timestamp;
+ decoder->continuous_period = true;
+ }
+ }
+
+ if (masked_timestamp < decoder->last_masked_timestamp)
+ return decoder->period_ticks;
+
+ return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ return decoder->period - decoder->period_insn_cnt;
+ case INTEL_PT_PERIOD_TICKS:
+ return intel_pt_next_period(decoder);
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ return 0;
+ }
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ decoder->period_insn_cnt = 0;
+ break;
+ case INTEL_PT_PERIOD_TICKS:
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ decoder->last_masked_timestamp = masked_timestamp;
+ else
+ decoder->last_masked_timestamp += decoder->period_ticks;
+ break;
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ break;
+ }
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+
+ err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+ max_insn_cnt, decoder->data);
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ if (err) {
+ decoder->no_progress = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR2;
+ intel_pt_log_at("ERROR: Failed to get instruction",
+ decoder->ip);
+ if (err == -ENOENT)
+ return -ENOLINK;
+ return -EILSEQ;
+ }
+
+ if (ip && decoder->ip == ip) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ intel_pt_sample_insn(decoder);
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn->length;
+ err = INTEL_PT_RETURN;
+ goto out;
+ }
+
+ if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+ /* Zero-length calls are excluded */
+ if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+ intel_pt_insn->rel) {
+ err = intel_pt_push(&decoder->stack, decoder->ip +
+ intel_pt_insn->length);
+ if (err)
+ goto out;
+ }
+ } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+ decoder->ret_addr = intel_pt_pop(&decoder->stack);
+ }
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+ int cnt = decoder->no_progress++;
+
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn->length +
+ intel_pt_insn->rel;
+ decoder->state.to_ip = decoder->ip;
+ err = INTEL_PT_RETURN;
+
+ /*
+ * Check for being stuck in a loop. This can happen if a
+ * decoder error results in the decoder erroneously setting the
+ * ip to an address that is itself in an infinite loop that
+ * consumes no packets. When that happens, there must be an
+ * unconditional branch.
+ */
+ if (cnt) {
+ if (cnt == 1) {
+ decoder->stuck_ip = decoder->state.to_ip;
+ decoder->stuck_ip_prd = 1;
+ decoder->stuck_ip_cnt = 1;
+ } else if (cnt > INTEL_PT_MAX_LOOPS ||
+ decoder->state.to_ip == decoder->stuck_ip) {
+ intel_pt_log_at("ERROR: Never-ending loop",
+ decoder->state.to_ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ err = -ELOOP;
+ goto out;
+ } else if (!--decoder->stuck_ip_cnt) {
+ decoder->stuck_ip_prd += 1;
+ decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+ decoder->stuck_ip = decoder->state.to_ip;
+ }
+ }
+ goto out_no_progress;
+ }
+out:
+ decoder->no_progress = 0;
+out_no_progress:
+ decoder->state.insn_op = intel_pt_insn->op;
+ decoder->state.insn_len = intel_pt_insn->length;
+ memcpy(decoder->state.insn, intel_pt_insn->buf,
+ INTEL_PT_INSN_BUF_SZ);
+
+ if (decoder->tx_flags & INTEL_PT_IN_TX)
+ decoder->state.flags |= INTEL_PT_IN_TX;
+
+ return err;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+ bool ret = false;
+
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+ decoder->state.type = INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.flags = decoder->fup_tx_flags;
+ return true;
+ }
+ if (decoder->set_fup_ptw) {
+ decoder->set_fup_ptw = false;
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->fup_ptw_payload;
+ return true;
+ }
+ if (decoder->set_fup_mwait) {
+ decoder->set_fup_mwait = false;
+ decoder->state.type = INTEL_PT_MWAIT_OP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.mwait_payload = decoder->fup_mwait_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_pwre) {
+ decoder->set_fup_pwre = false;
+ decoder->state.type |= INTEL_PT_PWR_ENTRY;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwre_payload = decoder->fup_pwre_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_exstop) {
+ decoder->set_fup_exstop = false;
+ decoder->state.type |= INTEL_PT_EX_STOP;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
+ return ret;
+}
+
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip, int err)
+{
+ return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+ intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+ ip == decoder->ip + intel_pt_insn->length;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t ip;
+ int err;
+
+ ip = decoder->last_ip;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err == -EAGAIN ||
+ intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (intel_pt_fup_event(decoder) && no_tip)
+ return 0;
+ return -EAGAIN;
+ }
+ decoder->set_fup_tx_flags = false;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ intel_pt_log_at("ERROR: Unexpected indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ intel_pt_log_at("ERROR: Unexpected conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN &&
+ decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ (decoder->state.type & INTEL_PT_BRANCH) &&
+ decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+ /* Unconditional branch leaving filter region */
+ decoder->no_progress = 0;
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0)
+ decoder->ip = decoder->last_ip;
+ } else {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ }
+ }
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+ intel_pt_insn.rel;
+
+ if (decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ decoder->pgd_ip(to_ip, decoder->data)) {
+ /* Conditional branch leaving filter region */
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->ip = to_ip;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
+ intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ return intel_pt_bug(decoder);
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+ if (!decoder->return_compression) {
+ intel_pt_log_at("ERROR: RET when expecting conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!decoder->ret_addr) {
+ intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!(decoder->tnt.payload & BIT63)) {
+ intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = decoder->ret_addr;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ /* Handle deferred TIPs */
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type != INTEL_PT_TIP ||
+ decoder->packet.count == 0) {
+ intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+ }
+ intel_pt_set_last_ip(decoder);
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (decoder->tnt.payload & BIT63) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn.length +
+ intel_pt_insn.rel;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+ /* Instruction sample for a non-taken branch */
+ if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn.length;
+ return 0;
+ }
+ decoder->ip += intel_pt_insn.length;
+ if (!decoder->tnt.count) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ return -EAGAIN;
+ }
+ decoder->tnt.payload <<= 1;
+ continue;
+ }
+
+ return intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+ unsigned int fup_tx_flags;
+ int err;
+
+ fup_tx_flags = decoder->packet.payload &
+ (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->fup_tx_flags = fup_tx_flags;
+ decoder->set_fup_tx_flags = true;
+ if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+ *no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+ decoder->pos);
+ intel_pt_update_in_tx(decoder);
+ }
+ return 0;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+
+ decoder->have_tma = false;
+
+ if (decoder->ref_timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->ref_timestamp & (0xffULL << 56));
+ if (timestamp < decoder->ref_timestamp) {
+ if (decoder->ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - decoder->ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+ decoder->tsc_timestamp = timestamp;
+ decoder->timestamp = timestamp;
+ decoder->ref_timestamp = 0;
+ decoder->timestamp_insn_cnt = 0;
+ } else if (decoder->timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->timestamp & (0xffULL << 56));
+ decoder->tsc_timestamp = timestamp;
+ if (timestamp < decoder->timestamp &&
+ decoder->timestamp - timestamp < decoder->tsc_slip) {
+ intel_pt_log_to("Suppressing backwards timestamp",
+ timestamp);
+ timestamp = decoder->timestamp;
+ }
+ if (timestamp < decoder->timestamp) {
+ intel_pt_log_to("Wraparound timestamp", timestamp);
+ timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
+ }
+ decoder->timestamp = timestamp;
+ decoder->timestamp_insn_cnt = 0;
+ }
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, false);
+ }
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Buffer overflow\n");
+ intel_pt_clear_tx_flags(decoder);
+ decoder->timestamp_insn_cnt = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ decoder->overflow = true;
+ return -EOVERFLOW;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+ uint32_t ctc = decoder->packet.payload;
+ uint32_t fc = decoder->packet.count;
+ uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return;
+
+ decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ decoder->ctc_timestamp -= multdiv(ctc_rem,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ decoder->ctc_delta = 0;
+ decoder->have_tma = true;
+ decoder->fixup_last_mtc = true;
+ intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
+ decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ uint32_t mtc, mtc_delta;
+
+ if (!decoder->have_tma)
+ return;
+
+ mtc = decoder->packet.payload;
+
+ if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+ decoder->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &decoder->last_mtc);
+ }
+
+ if (mtc > decoder->last_mtc)
+ mtc_delta = mtc - decoder->last_mtc;
+ else
+ mtc_delta = mtc + 256 - decoder->last_mtc;
+
+ decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = decoder->ctc_timestamp +
+ decoder->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = decoder->ctc_timestamp +
+ multdiv(decoder->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+ decoder->last_mtc = mtc;
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, true);
+ }
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+ unsigned int cbr = decoder->packet.payload & 0xff;
+
+ decoder->cbr_payload = decoder->packet.payload;
+
+ if (decoder->cbr == cbr)
+ return;
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+ decoder->have_cyc = true;
+
+ decoder->cycle_cnt += decoder->packet.payload;
+
+ if (!decoder->cyc_ref_timestamp)
+ return;
+
+ if (decoder->have_calc_cyc_to_tsc)
+ timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+ else if (decoder->cbr)
+ timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+ else
+ return;
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -EAGAIN;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (decoder->packet.count)
+ intel_pt_set_last_ip(decoder);
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+ decoder->tx_flags = 0;
+ decoder->state.flags &= ~INTEL_PT_IN_TX;
+ decoder->state.flags |= INTEL_PT_ABORT_TX;
+ } else {
+ decoder->state.flags |= INTEL_PT_ASYNC;
+ }
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_FUP:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ intel_pt_log("ERROR: Missing TIP after FUP\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP_PGD:
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0) {
+ intel_pt_set_ip(decoder);
+ intel_pt_log("Omitting PGD ip " x64_fmt "\n",
+ decoder->ip);
+ }
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ return 0;
+
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+ decoder->ip);
+ decoder->state.from_ip = 0;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_TIP:
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+ bool no_tip = false;
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+next:
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ if (!decoder->packet.count)
+ break;
+ decoder->tnt = decoder->packet;
+ decoder->pkt_state = INTEL_PT_STATE_TNT;
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ break;
+ return err;
+
+ case INTEL_PT_TIP_PGD:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_TIP_PGE: {
+ decoder->pge = true;
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero TIP.PGE",
+ decoder->pos);
+ break;
+ }
+ intel_pt_set_ip(decoder);
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_FUP:
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero FUP",
+ decoder->pos);
+ no_tip = false;
+ break;
+ }
+ intel_pt_set_last_ip(decoder);
+ if (!decoder->branch_enable) {
+ decoder->ip = decoder->last_ip;
+ if (intel_pt_fup_event(decoder))
+ return 0;
+ no_tip = false;
+ break;
+ }
+ if (decoder->set_fup_mwait)
+ no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
+ err = intel_pt_walk_fup(decoder);
+ if (err != -EAGAIN)
+ return err;
+ if (no_tip) {
+ no_tip = false;
+ break;
+ }
+ return intel_pt_walk_fup_tip(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psbend(decoder);
+ if (err == -EAGAIN)
+ goto next;
+ if (err)
+ return err;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+ break;
+ /*
+ * Ensure that there has been an instruction since the
+ * last MTC.
+ */
+ if (!decoder->mtc_insn)
+ break;
+ decoder->mtc_insn = false;
+ /* Ensure that there is a timestamp */
+ if (!decoder->timestamp)
+ break;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->mtc_insn = false;
+ return 0;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ if (!decoder->branch_enable &&
+ decoder->cbr != decoder->cbr_seen) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type = INTEL_PT_CBR_CHG;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.cbr_payload =
+ decoder->packet.payload;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ /* MODE_TSX need not be followed by FUP */
+ if (!decoder->pge) {
+ intel_pt_update_in_tx(decoder);
+ break;
+ }
+ err = intel_pt_mode_tsx(decoder, &no_tip);
+ if (err)
+ return err;
+ goto next;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ case INTEL_PT_PTWRITE_IP:
+ decoder->fup_ptw_payload = decoder->packet.payload;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_ptw = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_PTWRITE:
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_MWAIT:
+ decoder->fup_mwait_payload = decoder->packet.payload;
+ decoder->set_fup_mwait = true;
+ break;
+
+ case INTEL_PT_PWRE:
+ if (decoder->set_fup_mwait) {
+ decoder->fup_pwre_payload =
+ decoder->packet.payload;
+ decoder->set_fup_pwre = true;
+ break;
+ }
+ decoder->state.type = INTEL_PT_PWR_ENTRY;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_EXSTOP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_exstop = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EXSTOP:
+ decoder->state.type = INTEL_PT_EX_STOP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_PWRX:
+ decoder->state.type = INTEL_PT_PWR_EXIT;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+ return decoder->packet.count &&
+ (decoder->have_last_ip || decoder->packet.count == 3 ||
+ decoder->packet.count == 6);
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ __fallthrough;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -ENOENT;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (intel_pt_have_ip(decoder)) {
+ uint64_t current_ip = decoder->ip;
+
+ intel_pt_set_ip(decoder);
+ if (current_ip)
+ intel_pt_log_to("Setting IP",
+ decoder->ip);
+ }
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ __fallthrough;
+
+ case INTEL_PT_TNT:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ if (decoder->ip)
+ decoder->pkt_state = INTEL_PT_STATE_ERR4;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_PSB:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ __fallthrough;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_FUP:
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+ if (decoder->ip) {
+ /* Do not have a sample */
+ decoder->state.type = 0;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+
+ if (!decoder->branch_enable) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+ decoder->state.type = 0; /* Do not have a sample */
+ return 0;
+ }
+
+ intel_pt_log("Scanning for full IP\n");
+ err = intel_pt_walk_to_ip(decoder);
+ if (err)
+ return err;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ intel_pt_log_to("Setting IP", decoder->ip);
+
+ return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+ const unsigned char *end = decoder->buf + decoder->len;
+ size_t i;
+
+ for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+ if (i > decoder->len)
+ continue;
+ if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+ return i;
+ }
+ return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+ size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+ const char *psb = INTEL_PT_PSB_STR;
+
+ if (rest_psb > decoder->len ||
+ memcmp(decoder->buf, psb + part_psb, rest_psb))
+ return 0;
+
+ return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+ int part_psb)
+{
+ int rest_psb, ret;
+
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+
+ rest_psb = intel_pt_rest_psb(decoder, part_psb);
+ if (!rest_psb)
+ return 0;
+
+ decoder->pos -= part_psb;
+ decoder->next_buf = decoder->buf + rest_psb;
+ decoder->next_len = decoder->len - rest_psb;
+ memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ decoder->buf = decoder->temp_buf;
+ decoder->len = INTEL_PT_PSB_LEN;
+
+ return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+ unsigned char *next;
+ int ret;
+
+ intel_pt_log("Scanning for PSB\n");
+ while (1) {
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+ INTEL_PT_PSB_LEN);
+ if (!next) {
+ int part_psb;
+
+ part_psb = intel_pt_part_psb(decoder);
+ if (part_psb) {
+ ret = intel_pt_get_split_psb(decoder, part_psb);
+ if (ret)
+ return ret;
+ } else {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ }
+ continue;
+ }
+
+ decoder->pkt_step = next - decoder->buf;
+ return intel_pt_get_next_packet(decoder);
+ }
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ intel_pt_clear_stack(&decoder->stack);
+
+ err = intel_pt_scan_for_psb(decoder);
+ if (err)
+ return err;
+
+ decoder->have_last_ip = true;
+ decoder->pkt_state = INTEL_PT_STATE_NO_IP;
+
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+
+ if (decoder->ip) {
+ decoder->state.type = 0; /* Do not have a sample */
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else {
+ return intel_pt_sync_ip(decoder);
+ }
+
+ return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t est = decoder->sample_insn_cnt << 1;
+
+ if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+ goto out;
+
+ est *= decoder->max_non_turbo_ratio;
+ est /= decoder->cbr;
+out:
+ return decoder->sample_timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ do {
+ decoder->state.type = INTEL_PT_BRANCH;
+ decoder->state.flags = 0;
+
+ switch (decoder->pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ err = intel_pt_sync(decoder);
+ break;
+ case INTEL_PT_STATE_NO_IP:
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ __fallthrough;
+ case INTEL_PT_STATE_ERR_RESYNC:
+ err = intel_pt_sync_ip(decoder);
+ break;
+ case INTEL_PT_STATE_IN_SYNC:
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TNT_CONT:
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ err = intel_pt_walk_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_fup_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ default:
+ err = intel_pt_bug(decoder);
+ break;
+ }
+ } while (err == -ENOLINK);
+
+ if (err) {
+ decoder->state.err = intel_pt_ext_err(err);
+ decoder->state.from_ip = decoder->ip;
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ } else {
+ decoder->state.err = 0;
+ if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type |= INTEL_PT_CBR_CHG;
+ decoder->state.cbr_payload = decoder->cbr_payload;
+ }
+ if (intel_pt_sample_time(decoder->pkt_state)) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ }
+ }
+
+ decoder->state.timestamp = decoder->sample_timestamp;
+ decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+ decoder->state.cr3 = decoder->cr3;
+ decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+
+ return &decoder->state;
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ * packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ if (!*len)
+ return false;
+
+ next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+ const char *n = INTEL_PT_PSB_STR;
+ unsigned char *p;
+ size_t k;
+
+ if (len < INTEL_PT_PSB_LEN)
+ return NULL;
+
+ k = len - INTEL_PT_PSB_LEN + 1;
+ while (1) {
+ p = memrchr(buf, n[0], k);
+ if (!p)
+ return NULL;
+ if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+ return p;
+ k = p - buf;
+ if (!k)
+ return NULL;
+ }
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
+ *
+ * Find a TSC packet in @buf and return the TSC value. This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+ size_t *rem)
+{
+ struct intel_pt_pkt packet;
+ int ret;
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet);
+ if (ret <= 0)
+ return false;
+ if (packet.type == INTEL_PT_TSC) {
+ *tsc = packet.payload;
+ *rem = len;
+ return true;
+ }
+ if (packet.type == INTEL_PT_PSBEND)
+ return false;
+ buf += ret;
+ len -= ret;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+ const uint64_t halfway = (1ULL << 55);
+
+ if (tsc1 == tsc2)
+ return 0;
+
+ if (tsc1 < tsc2) {
+ if (tsc2 - tsc1 < halfway)
+ return -1;
+ else
+ return 1;
+ } else {
+ if (tsc1 - tsc2 < halfway)
+ return 1;
+ else
+ return -1;
+ }
+}
+
+#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
+
+/**
+ * adj_for_padding - adjust overlap to account for padding.
+ * @buf_b: second buffer
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ *
+ * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
+ * accordingly.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts
+ */
+static unsigned char *adj_for_padding(unsigned char *buf_b,
+ unsigned char *buf_a, size_t len_a)
+{
+ unsigned char *p = buf_b - MAX_PADDING;
+ unsigned char *q = buf_a + len_a - MAX_PADDING;
+ int i;
+
+ for (i = MAX_PADDING; i; i--, p++, q++) {
+ if (*p != *q)
+ break;
+ }
+
+ return p;
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ * using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found. A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+ size_t len_a,
+ unsigned char *buf_b,
+ size_t len_b, bool *consecutive)
+{
+ uint64_t tsc_a, tsc_b;
+ unsigned char *p;
+ size_t len, rem_a, rem_b;
+
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No PSB in buf_a => no overlap */
+
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
+ /* The last PSB+ in buf_a is incomplete, so go back one more */
+ len_a -= len;
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No full PSB+ => assume no overlap */
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
+ return buf_b; /* No TSC in buf_a => assume no overlap */
+ }
+
+ while (1) {
+ /* Ignore PSB+ with no TSC */
+ if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+ int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+ /* Same TSC, so buffers are consecutive */
+ if (!cmp && rem_b >= rem_a) {
+ unsigned char *start;
+
+ *consecutive = true;
+ start = buf_b + len_b - (rem_b - rem_a);
+ return adj_for_padding(start, buf_a, len_a);
+ }
+ if (cmp < 0)
+ return buf_b; /* tsc_a < tsc_b => no overlap */
+ }
+
+ if (!intel_pt_step_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB in buf_b => no data */
+ }
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps. Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive)
+{
+ unsigned char *found;
+
+ /* Buffer 'b' must start at PSB so throw away everything before that */
+ if (!intel_pt_next_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB */
+
+ if (!intel_pt_next_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+
+ if (have_tsc) {
+ found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+ consecutive);
+ if (found)
+ return found;
+ }
+
+ /*
+ * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+ * we can ignore the first part of buffer 'a'.
+ */
+ while (len_b < len_a) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+
+ /* Now len_b >= len_a */
+ while (1) {
+ /* Potential overlap so check the bytes */
+ found = memmem(buf_a, len_a, buf_b, len_a);
+ if (found) {
+ *consecutive = true;
+ return adj_for_padding(buf_b + len_a, buf_a, len_a);
+ }
+
+ /* Try again at next PSB in buffer 'a' */
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 000000000..51c18d67f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,133 @@
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX (1 << 0)
+#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_ASYNC (1 << 2)
+#define INTEL_PT_FUP_IP (1 << 3)
+
+enum intel_pt_sample_type {
+ INTEL_PT_BRANCH = 1 << 0,
+ INTEL_PT_INSTRUCTION = 1 << 1,
+ INTEL_PT_TRANSACTION = 1 << 2,
+ INTEL_PT_PTW = 1 << 3,
+ INTEL_PT_MWAIT_OP = 1 << 4,
+ INTEL_PT_PWR_ENTRY = 1 << 5,
+ INTEL_PT_EX_STOP = 1 << 6,
+ INTEL_PT_PWR_EXIT = 1 << 7,
+ INTEL_PT_CBR_CHG = 1 << 8,
+};
+
+enum intel_pt_period_type {
+ INTEL_PT_PERIOD_NONE,
+ INTEL_PT_PERIOD_INSTRUCTIONS,
+ INTEL_PT_PERIOD_TICKS,
+ INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+ INTEL_PT_ERR_NOMEM = 1,
+ INTEL_PT_ERR_INTERN,
+ INTEL_PT_ERR_BADPKT,
+ INTEL_PT_ERR_NODATA,
+ INTEL_PT_ERR_NOINSN,
+ INTEL_PT_ERR_MISMAT,
+ INTEL_PT_ERR_OVR,
+ INTEL_PT_ERR_LOST,
+ INTEL_PT_ERR_UNK,
+ INTEL_PT_ERR_NELOOP,
+ INTEL_PT_ERR_MAX,
+};
+
+enum intel_pt_param_flags {
+ /*
+ * FUP packet can contain next linear instruction pointer instead of
+ * current linear instruction pointer.
+ */
+ INTEL_PT_FUP_WITH_NLIP = 1 << 0,
+};
+
+struct intel_pt_state {
+ enum intel_pt_sample_type type;
+ int err;
+ uint64_t from_ip;
+ uint64_t to_ip;
+ uint64_t cr3;
+ uint64_t tot_insn_cnt;
+ uint64_t timestamp;
+ uint64_t est_timestamp;
+ uint64_t trace_nr;
+ uint64_t ptw_payload;
+ uint64_t mwait_payload;
+ uint64_t pwre_payload;
+ uint64_t pwrx_payload;
+ uint64_t cbr_payload;
+ uint32_t flags;
+ enum intel_pt_insn_op insn_op;
+ int insn_len;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+ const unsigned char *buf;
+ size_t len;
+ bool consecutive;
+ uint64_t ref_timestamp;
+ uint64_t trace_nr;
+};
+
+struct intel_pt_params {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ void *data;
+ bool return_compression;
+ bool branch_enable;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ unsigned max_non_turbo_ratio;
+ unsigned int mtc_period;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ enum intel_pt_param_flags flags;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 000000000..548188280
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,274 @@
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "event.h"
+
+#include "insn.h"
+
+#include "inat.c"
+#include "insn.c"
+
+#include "intel-pt-insn-decoder.h"
+#include "dump-insn.h"
+
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
+#error Instruction buffer size too small
+#endif
+
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+ enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+ int ext;
+
+ intel_pt_insn->rel = 0;
+
+ if (insn_is_avx(insn)) {
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
+ intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+ intel_pt_insn->length = insn->length;
+ return;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xf:
+ switch (insn->opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ op = INTEL_PT_OP_SYSCALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ op = INTEL_PT_OP_SYSRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x80 ... 0x8f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x70 ... 0x7f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ op = INTEL_PT_OP_RET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcf: /* iret */
+ op = INTEL_PT_OP_IRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcc ... 0xce: /* int */
+ op = INTEL_PT_OP_INT;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe8: /* call near rel */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0x9a: /* call far absolute */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe0 ... 0xe2: /* loop */
+ op = INTEL_PT_OP_LOOP;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe3: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe9: /* jmp */
+ case 0xeb: /* jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0xea: /* far jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 4:
+ case 5:
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ intel_pt_insn->op = op;
+ intel_pt_insn->branch = branch;
+ intel_pt_insn->length = insn->length;
+
+ if (branch == INTEL_PT_BR_CONDITIONAL ||
+ branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER == __BIG_ENDIAN
+ switch (insn->immediate.nbytes) {
+ case 1:
+ intel_pt_insn->rel = insn->immediate.value;
+ break;
+ case 2:
+ intel_pt_insn->rel =
+ bswap_16((short)insn->immediate.value);
+ break;
+ case 4:
+ intel_pt_insn->rel = bswap_32(insn->immediate.value);
+ break;
+ default:
+ intel_pt_insn->rel = 0;
+ break;
+ }
+#else
+ intel_pt_insn->rel = insn->immediate.value;
+#endif
+ }
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct insn insn;
+
+ insn_init(&insn, buf, len, x86_64);
+ insn_get_length(&insn);
+ if (!insn_complete(&insn) || insn.length > len)
+ return -1;
+ intel_pt_insn_decoder(&insn, intel_pt_insn);
+ if (insn.length < INTEL_PT_INSN_BUF_SZ)
+ memcpy(intel_pt_insn->buf, buf, insn.length);
+ else
+ memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ);
+ return 0;
+}
+
+const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
+ u8 *inbuf, int inlen, int *lenp)
+{
+ struct insn insn;
+ int n, i;
+ int left;
+
+ insn_init(&insn, inbuf, inlen, x->is64bit);
+ insn_get_length(&insn);
+ if (!insn_complete(&insn) || insn.length > inlen)
+ return "<bad>";
+ if (lenp)
+ *lenp = insn.length;
+ left = sizeof(x->out);
+ n = snprintf(x->out, left, "insn: ");
+ left -= n;
+ for (i = 0; i < insn.length; i++) {
+ n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
+ left -= n;
+ }
+ return x->out;
+}
+
+const char *branch_name[] = {
+ [INTEL_PT_OP_OTHER] = "Other",
+ [INTEL_PT_OP_CALL] = "Call",
+ [INTEL_PT_OP_RET] = "Ret",
+ [INTEL_PT_OP_JCC] = "Jcc",
+ [INTEL_PT_OP_JMP] = "Jmp",
+ [INTEL_PT_OP_LOOP] = "Loop",
+ [INTEL_PT_OP_IRET] = "IRet",
+ [INTEL_PT_OP_INT] = "Int",
+ [INTEL_PT_OP_SYSCALL] = "Syscall",
+ [INTEL_PT_OP_SYSRET] = "Sysret",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+ return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len)
+{
+ switch (intel_pt_insn->branch) {
+ case INTEL_PT_BR_CONDITIONAL:
+ case INTEL_PT_BR_UNCONDITIONAL:
+ return snprintf(buf, buf_len, "%s %s%d",
+ intel_pt_insn_name(intel_pt_insn->op),
+ intel_pt_insn->rel > 0 ? "+" : "",
+ intel_pt_insn->rel);
+ case INTEL_PT_BR_NO_BRANCH:
+ case INTEL_PT_BR_INDIRECT:
+ return snprintf(buf, buf_len, "%s",
+ intel_pt_insn_name(intel_pt_insn->op));
+ default:
+ break;
+ }
+ return 0;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+ switch (op) {
+ case INTEL_PT_OP_OTHER:
+ return 0;
+ case INTEL_PT_OP_CALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+ case INTEL_PT_OP_RET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+ case INTEL_PT_OP_JCC:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_JMP:
+ return PERF_IP_FLAG_BRANCH;
+ case INTEL_PT_OP_LOOP:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_IRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_INT:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_SYSCALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_SYSRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ default:
+ return 0;
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 000000000..37ec5627a
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,63 @@
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX 32
+#define INTEL_PT_INSN_BUF_SZ 16
+
+enum intel_pt_insn_op {
+ INTEL_PT_OP_OTHER,
+ INTEL_PT_OP_CALL,
+ INTEL_PT_OP_RET,
+ INTEL_PT_OP_JCC,
+ INTEL_PT_OP_JMP,
+ INTEL_PT_OP_LOOP,
+ INTEL_PT_OP_IRET,
+ INTEL_PT_OP_INT,
+ INTEL_PT_OP_SYSCALL,
+ INTEL_PT_OP_SYSRET,
+};
+
+enum intel_pt_insn_branch {
+ INTEL_PT_BR_NO_BRANCH,
+ INTEL_PT_BR_INDIRECT,
+ INTEL_PT_BR_CONDITIONAL,
+ INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ int length;
+ int32_t rel;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 000000000..e02bc7b16
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,156 @@
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+bool intel_pt_enable_logging;
+
+void intel_pt_log_enable(void)
+{
+ intel_pt_enable_logging = true;
+}
+
+void intel_pt_log_disable(void)
+{
+ if (f)
+ fflush(f);
+ intel_pt_enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+ strncpy(log_name, name, MAX_LOG_NAME - 5);
+ strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+ int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < len; i++)
+ fprintf(f, " %02x", buf[i]);
+ for (; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static int intel_pt_log_open(void)
+{
+ if (!intel_pt_enable_logging)
+ return -1;
+
+ if (f)
+ return 0;
+
+ if (!log_name[0])
+ return -1;
+
+ f = fopen(log_name, "w+");
+ if (!f) {
+ intel_pt_enable_logging = false;
+ return -1;
+ }
+
+ return 0;
+}
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf)
+{
+ char desc[INTEL_PT_PKT_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_data(buf, pkt_len, pos, 0);
+ intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+ fprintf(f, "%s\n", desc);
+}
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+ size_t len = intel_pt_insn->length;
+
+ if (intel_pt_log_open())
+ return;
+
+ if (len > INTEL_PT_INSN_BUF_SZ)
+ len = INTEL_PT_INSN_BUF_SZ;
+ intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_no_data(ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log(const char *fmt, ...)
+{
+ va_list args;
+
+ if (intel_pt_log_open())
+ return;
+
+ va_start(args, fmt);
+ vfprintf(f, fmt, args);
+ va_end(args);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 000000000..45b64f93f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,78 @@
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <linux/compiler.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void intel_pt_log_enable(void);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip);
+
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
+
+#define intel_pt_log(fmt, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_packet(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define x64_fmt "0x%" PRIx64
+
+extern bool intel_pt_enable_logging;
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 000000000..d426761a5
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,638 @@
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/compiler.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n) (1 << (n))
+
+#define BIT63 ((uint64_t)1 << 63)
+
+#define NR_FLAG BIT63
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+ [INTEL_PT_BAD] = "Bad Packet!",
+ [INTEL_PT_PAD] = "PAD",
+ [INTEL_PT_TNT] = "TNT",
+ [INTEL_PT_TIP_PGD] = "TIP.PGD",
+ [INTEL_PT_TIP_PGE] = "TIP.PGE",
+ [INTEL_PT_TSC] = "TSC",
+ [INTEL_PT_TMA] = "TMA",
+ [INTEL_PT_MODE_EXEC] = "MODE.Exec",
+ [INTEL_PT_MODE_TSX] = "MODE.TSX",
+ [INTEL_PT_MTC] = "MTC",
+ [INTEL_PT_TIP] = "TIP",
+ [INTEL_PT_FUP] = "FUP",
+ [INTEL_PT_CYC] = "CYC",
+ [INTEL_PT_VMCS] = "VMCS",
+ [INTEL_PT_PSB] = "PSB",
+ [INTEL_PT_PSBEND] = "PSBEND",
+ [INTEL_PT_CBR] = "CBR",
+ [INTEL_PT_TRACESTOP] = "TraceSTOP",
+ [INTEL_PT_PIP] = "PIP",
+ [INTEL_PT_OVF] = "OVF",
+ [INTEL_PT_MNT] = "MNT",
+ [INTEL_PT_PTWRITE] = "PTWRITE",
+ [INTEL_PT_PTWRITE_IP] = "PTWRITE",
+ [INTEL_PT_EXSTOP] = "EXSTOP",
+ [INTEL_PT_EXSTOP_IP] = "EXSTOP",
+ [INTEL_PT_MWAIT] = "MWAIT",
+ [INTEL_PT_PWRE] = "PWRE",
+ [INTEL_PT_PWRX] = "PWRX",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+ return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload;
+ int count;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ payload = le64_to_cpu(*(uint64_t *)buf);
+
+ for (count = 47; count; count--) {
+ if (payload & BIT63)
+ break;
+ payload <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = payload << 1;
+ return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload = 0;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_PIP;
+ memcpy_le64(&payload, buf + 2, 6);
+ packet->payload = payload >> 1;
+ if (payload & 1)
+ packet->payload |= NR_FLAG;
+
+ return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_TRACESTOP;
+ return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_CBR;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
+ return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int count = (52 - 5) >> 3;
+
+ if (count < 1 || count > 7)
+ return INTEL_PT_BAD_PACKET;
+
+ if (len < count + 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_VMCS;
+ packet->count = count;
+ memcpy_le64(&packet->payload, buf + 2, count);
+
+ return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_OVF;
+ return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int i;
+
+ if (len < 16)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ for (i = 2; i < 16; i += 2) {
+ if (buf[i] != 2 || buf[i + 1] != 0x82)
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = INTEL_PT_PSB;
+ return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PSBEND;
+ return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_TMA;
+ packet->payload = buf[2] | (buf[3] << 8);
+ packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+ return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PAD;
+ return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MNT;
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11
+;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[2]) {
+ case 0x88: /* MNT */
+ return intel_pt_get_mnt(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ packet->count = (buf[1] >> 5) & 0x3;
+ packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+ INTEL_PT_PTWRITE;
+
+ switch (packet->count) {
+ case 0:
+ if (len < 6)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+ return 6;
+ case 1:
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP;
+ return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP_IP;
+ return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MWAIT;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRE;
+ memcpy_le64(&packet->payload, buf + 2, 2);
+ return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRX;
+ memcpy_le64(&packet->payload, buf + 2, 5);
+ return 7;
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ if ((buf[1] & 0x1f) == 0x12)
+ return intel_pt_get_ptwrite(buf, len, packet);
+
+ switch (buf[1]) {
+ case 0xa3: /* Long TNT */
+ return intel_pt_get_long_tnt(buf, len, packet);
+ case 0x43: /* PIP */
+ return intel_pt_get_pip(buf, len, packet);
+ case 0x83: /* TraceStop */
+ return intel_pt_get_tracestop(packet);
+ case 0x03: /* CBR */
+ return intel_pt_get_cbr(buf, len, packet);
+ case 0xc8: /* VMCS */
+ return intel_pt_get_vmcs(buf, len, packet);
+ case 0xf3: /* OVF */
+ return intel_pt_get_ovf(packet);
+ case 0x82: /* PSB */
+ return intel_pt_get_psb(buf, len, packet);
+ case 0x23: /* PSBEND */
+ return intel_pt_get_psbend(packet);
+ case 0x73: /* TMA */
+ return intel_pt_get_tma(buf, len, packet);
+ case 0xC3: /* 3-byte header */
+ return intel_pt_get_3byte(buf, len, packet);
+ case 0x62: /* EXSTOP no IP */
+ return intel_pt_get_exstop(packet);
+ case 0xE2: /* EXSTOP with IP */
+ return intel_pt_get_exstop_ip(packet);
+ case 0xC2: /* MWAIT */
+ return intel_pt_get_mwait(buf, len, packet);
+ case 0x22: /* PWRE */
+ return intel_pt_get_pwre(buf, len, packet);
+ case 0xA2: /* PWRX */
+ return intel_pt_get_pwrx(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+ struct intel_pt_pkt *packet)
+{
+ int count;
+
+ for (count = 6; count; count--) {
+ if (byte & BIT(7))
+ break;
+ byte <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = (uint64_t)byte << 57;
+
+ return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+ size_t len, struct intel_pt_pkt *packet)
+{
+ unsigned int offs = 1, shift;
+ uint64_t payload = byte >> 3;
+
+ byte >>= 2;
+ len -= 1;
+ for (shift = 5; byte & 1; shift += 7) {
+ if (offs > 9)
+ return INTEL_PT_BAD_PACKET;
+ if (len < offs)
+ return INTEL_PT_NEED_MORE_BYTES;
+ byte = buf[offs++];
+ payload |= ((uint64_t)byte >> 1) << shift;
+ }
+
+ packet->type = INTEL_PT_CYC;
+ packet->payload = payload;
+ return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+ const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ip_len;
+
+ packet->count = byte >> 5;
+
+ switch (packet->count) {
+ case 0:
+ ip_len = 0;
+ break;
+ case 1:
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 2;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+ break;
+ case 2:
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 4;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+ break;
+ case 3:
+ case 4:
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 6;
+ memcpy_le64(&packet->payload, buf + 1, 6);
+ break;
+ case 6:
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 8;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = type;
+
+ return ip_len + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[1] >> 5) {
+ case 0:
+ packet->type = INTEL_PT_MODE_EXEC;
+ switch (buf[1] & 3) {
+ case 0:
+ packet->payload = 16;
+ break;
+ case 1:
+ packet->payload = 64;
+ break;
+ case 2:
+ packet->payload = 32;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ break;
+ case 1:
+ packet->type = INTEL_PT_MODE_TSX;
+ if ((buf[1] & 3) == 3)
+ return INTEL_PT_BAD_PACKET;
+ packet->payload = buf[1] & 3;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_TSC;
+ memcpy_le64(&packet->payload, buf + 1, 7);
+ return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MTC;
+ packet->payload = buf[1];
+ return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int byte;
+
+ memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ byte = buf[0];
+ if (!(byte & BIT(0))) {
+ if (byte == 0)
+ return intel_pt_get_pad(packet);
+ if (byte == 2)
+ return intel_pt_get_ext(buf, len, packet);
+ return intel_pt_get_short_tnt(byte, packet);
+ }
+
+ if ((byte & 2))
+ return intel_pt_get_cyc(byte, buf, len, packet);
+
+ switch (byte & 0x1f) {
+ case 0x0D:
+ return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+ case 0x11:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+ packet);
+ case 0x01:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+ packet);
+ case 0x1D:
+ return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+ case 0x19:
+ switch (byte) {
+ case 0x99:
+ return intel_pt_get_mode(buf, len, packet);
+ case 0x19:
+ return intel_pt_get_tsc(buf, len, packet);
+ case 0x59:
+ return intel_pt_get_mtc(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ret;
+
+ ret = intel_pt_do_get_packet(buf, len, packet);
+ if (ret > 0) {
+ while (ret < 8 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ }
+ return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int ret, i, nr;
+ unsigned long long payload = packet->payload;
+ const char *name = intel_pt_pkt_name(packet->type);
+
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ return snprintf(buf, buf_len, "%s", name);
+ case INTEL_PT_TNT: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, blen, "%s ", name);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ for (i = 0; i < packet->count; i++) {
+ if (payload & BIT63)
+ ret = snprintf(buf, blen, "T");
+ else
+ ret = snprintf(buf, blen, "N");
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ payload <<= 1;
+ }
+ ret = snprintf(buf, blen, " (%d)", packet->count);
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ if (!(packet->count))
+ return snprintf(buf, buf_len, "%s no ip", name);
+ __fallthrough;
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MTC:
+ case INTEL_PT_MNT:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+ case INTEL_PT_TMA:
+ return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+ (unsigned)payload, packet->count);
+ case INTEL_PT_MODE_EXEC:
+ return snprintf(buf, buf_len, "%s %lld", name, payload);
+ case INTEL_PT_MODE_TSX:
+ return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+ name, (unsigned)(payload >> 1) & 1,
+ (unsigned)payload & 1);
+ case INTEL_PT_PIP:
+ nr = packet->payload & NR_FLAG ? 1 : 0;
+ payload &= ~NR_FLAG;
+ ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+ name, payload, nr);
+ return ret;
+ case INTEL_PT_PTWRITE:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+ case INTEL_PT_PTWRITE_IP:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_EXSTOP:
+ return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_EXSTOP_IP:
+ return snprintf(buf, buf_len, "%s IP:1", name);
+ case INTEL_PT_MWAIT:
+ return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+ name, payload, (unsigned int)(payload & 0xff),
+ (unsigned int)((payload >> 32) & 0x3));
+ case INTEL_PT_PWRE:
+ return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+ name, payload, !!(payload & 0x80),
+ (unsigned int)((payload >> 12) & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_PWRX:
+ return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+ name, payload,
+ (unsigned int)((payload >> 4) & 0xf),
+ (unsigned int)(payload & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ default:
+ break;
+ }
+ return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->count);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 000000000..73ddc3a88
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,77 @@
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX 256
+
+#define INTEL_PT_NEED_MORE_BYTES -1
+#define INTEL_PT_BAD_PACKET -2
+
+#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
+ "\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN 16
+
+#define INTEL_PT_PKT_MAX_SZ 16
+
+enum intel_pt_pkt_type {
+ INTEL_PT_BAD,
+ INTEL_PT_PAD,
+ INTEL_PT_TNT,
+ INTEL_PT_TIP_PGD,
+ INTEL_PT_TIP_PGE,
+ INTEL_PT_TSC,
+ INTEL_PT_TMA,
+ INTEL_PT_MODE_EXEC,
+ INTEL_PT_MODE_TSX,
+ INTEL_PT_MTC,
+ INTEL_PT_TIP,
+ INTEL_PT_FUP,
+ INTEL_PT_CYC,
+ INTEL_PT_VMCS,
+ INTEL_PT_PSB,
+ INTEL_PT_PSBEND,
+ INTEL_PT_CBR,
+ INTEL_PT_TRACESTOP,
+ INTEL_PT_PIP,
+ INTEL_PT_OVF,
+ INTEL_PT_MNT,
+ INTEL_PT_PTWRITE,
+ INTEL_PT_PTWRITE_IP,
+ INTEL_PT_EXSTOP,
+ INTEL_PT_EXSTOP_IP,
+ INTEL_PT_MWAIT,
+ INTEL_PT_PWRE,
+ INTEL_PT_PWRX,
+};
+
+struct intel_pt_pkt {
+ enum intel_pt_pkt_type type;
+ int count;
+ uint64_t payload;
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
new file mode 100644
index 000000000..e0b85930d
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -0,0 +1,1072 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+# (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+# (ev): this opcode requires EVEX prefix.
+# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff: UD0
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f: vpabsq Vx,Wx (66),(ev)
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+80: INVEPT Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+cc: sha1rnds4 Vdq,Wdq,Ib
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1: TEST Eb,Ib
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE | ptwrite Ey (F3),(11B)
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable