diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:25:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:25:46 +0000 |
commit | 5277429a362a9cf4ce649557bf4c8fe0e20c05c0 (patch) | |
tree | 52cc97728c6fbb7393984dc3db05c5836107fe44 /plugins/plugin_kvm.c | |
parent | Initial commit. (diff) | |
download | libtraceevent-5277429a362a9cf4ce649557bf4c8fe0e20c05c0.tar.xz libtraceevent-5277429a362a9cf4ce649557bf4c8fe0e20c05c0.zip |
Adding upstream version 1:1.7.1.upstream/1%1.7.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'plugins/plugin_kvm.c')
-rw-r--r-- | plugins/plugin_kvm.c | 591 |
1 files changed, 591 insertions, 0 deletions
diff --git a/plugins/plugin_kvm.c b/plugins/plugin_kvm.c new file mode 100644 index 0000000..9852c35 --- /dev/null +++ b/plugins/plugin_kvm.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include "event-parse.h" +#include "trace-seq.h" + +#define __weak __attribute__((weak)) + +#ifdef HAVE_UDIS86 + +#include <udis86.h> + +static ud_t ud; + +static void init_disassembler(void) +{ + ud_init(&ud); + ud_set_syntax(&ud, UD_SYN_ATT); +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + int mode; + + if (!cr0_pe) + mode = 16; + else if (eflags_vm) + mode = 16; + else if (cs_l) + mode = 64; + else if (cs_d) + mode = 32; + else + mode = 16; + + ud_set_pc(&ud, rip); + ud_set_mode(&ud, mode); + ud_set_input_buffer(&ud, insn, len); + ud_disassemble(&ud); + return ud_insn_asm(&ud); +} + +#else + +static void init_disassembler(void) +{ +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + static char out[15*3+1]; + int i; + + for (i = 0; i < len; ++i) + sprintf(out + i * 3, "%02x ", insn[i]); + out[len*3-1] = '\0'; + return out; +} + +#endif + + +#define VMX_EXIT_REASONS \ + _ER(EXCEPTION_NMI, 0) \ + _ER(EXTERNAL_INTERRUPT, 1) \ + _ER(TRIPLE_FAULT, 2) \ + _ER(PENDING_INTERRUPT, 7) \ + _ER(NMI_WINDOW, 8) \ + _ER(TASK_SWITCH, 9) \ + _ER(CPUID, 10) \ + _ER(HLT, 12) \ + _ER(INVD, 13) \ + _ER(INVLPG, 14) \ + _ER(RDPMC, 15) \ + _ER(RDTSC, 16) \ + _ER(VMCALL, 18) \ + _ER(VMCLEAR, 19) \ + _ER(VMLAUNCH, 20) \ + _ER(VMPTRLD, 21) \ + _ER(VMPTRST, 22) \ + _ER(VMREAD, 23) \ + _ER(VMRESUME, 24) \ + _ER(VMWRITE, 25) \ + _ER(VMOFF, 26) \ + _ER(VMON, 27) \ + _ER(CR_ACCESS, 28) \ + _ER(DR_ACCESS, 29) \ + _ER(IO_INSTRUCTION, 30) \ + _ER(MSR_READ, 31) \ + _ER(MSR_WRITE, 32) \ + _ER(MWAIT_INSTRUCTION, 36) \ + _ER(MONITOR_INSTRUCTION, 39) \ + _ER(PAUSE_INSTRUCTION, 40) \ + _ER(MCE_DURING_VMENTRY, 41) \ + _ER(TPR_BELOW_THRESHOLD, 43) \ + _ER(APIC_ACCESS, 44) \ + _ER(EOI_INDUCED, 45) \ + _ER(EPT_VIOLATION, 48) \ + _ER(EPT_MISCONFIG, 49) \ + _ER(INVEPT, 50) \ + _ER(PREEMPTION_TIMER, 52) \ + _ER(WBINVD, 54) \ + _ER(XSETBV, 55) \ + _ER(APIC_WRITE, 56) \ + _ER(INVPCID, 58) \ + _ER(PML_FULL, 62) \ + _ER(XSAVES, 63) \ + _ER(XRSTORS, 64) + +#define SVM_EXIT_REASONS \ + _ER(EXIT_READ_CR0, 0x000) \ + _ER(EXIT_READ_CR3, 0x003) \ + _ER(EXIT_READ_CR4, 0x004) \ + _ER(EXIT_READ_CR8, 0x008) \ + _ER(EXIT_WRITE_CR0, 0x010) \ + _ER(EXIT_WRITE_CR3, 0x013) \ + _ER(EXIT_WRITE_CR4, 0x014) \ + _ER(EXIT_WRITE_CR8, 0x018) \ + _ER(EXIT_READ_DR0, 0x020) \ + _ER(EXIT_READ_DR1, 0x021) \ + _ER(EXIT_READ_DR2, 0x022) \ + _ER(EXIT_READ_DR3, 0x023) \ + _ER(EXIT_READ_DR4, 0x024) \ + _ER(EXIT_READ_DR5, 0x025) \ + _ER(EXIT_READ_DR6, 0x026) \ + _ER(EXIT_READ_DR7, 0x027) \ + _ER(EXIT_WRITE_DR0, 0x030) \ + _ER(EXIT_WRITE_DR1, 0x031) \ + _ER(EXIT_WRITE_DR2, 0x032) \ + _ER(EXIT_WRITE_DR3, 0x033) \ + _ER(EXIT_WRITE_DR4, 0x034) \ + _ER(EXIT_WRITE_DR5, 0x035) \ + _ER(EXIT_WRITE_DR6, 0x036) \ + _ER(EXIT_WRITE_DR7, 0x037) \ + _ER(EXIT_EXCP_DE, 0x040) \ + _ER(EXIT_EXCP_DB, 0x041) \ + _ER(EXIT_EXCP_BP, 0x043) \ + _ER(EXIT_EXCP_OF, 0x044) \ + _ER(EXIT_EXCP_BR, 0x045) \ + _ER(EXIT_EXCP_UD, 0x046) \ + _ER(EXIT_EXCP_NM, 0x047) \ + _ER(EXIT_EXCP_DF, 0x048) \ + _ER(EXIT_EXCP_TS, 0x04a) \ + _ER(EXIT_EXCP_NP, 0x04b) \ + _ER(EXIT_EXCP_SS, 0x04c) \ + _ER(EXIT_EXCP_GP, 0x04d) \ + _ER(EXIT_EXCP_PF, 0x04e) \ + _ER(EXIT_EXCP_MF, 0x050) \ + _ER(EXIT_EXCP_AC, 0x051) \ + _ER(EXIT_EXCP_MC, 0x052) \ + _ER(EXIT_EXCP_XF, 0x053) \ + _ER(EXIT_INTR, 0x060) \ + _ER(EXIT_NMI, 0x061) \ + _ER(EXIT_SMI, 0x062) \ + _ER(EXIT_INIT, 0x063) \ + _ER(EXIT_VINTR, 0x064) \ + _ER(EXIT_CR0_SEL_WRITE, 0x065) \ + _ER(EXIT_IDTR_READ, 0x066) \ + _ER(EXIT_GDTR_READ, 0x067) \ + _ER(EXIT_LDTR_READ, 0x068) \ + _ER(EXIT_TR_READ, 0x069) \ + _ER(EXIT_IDTR_WRITE, 0x06a) \ + _ER(EXIT_GDTR_WRITE, 0x06b) \ + _ER(EXIT_LDTR_WRITE, 0x06c) \ + _ER(EXIT_TR_WRITE, 0x06d) \ + _ER(EXIT_RDTSC, 0x06e) \ + _ER(EXIT_RDPMC, 0x06f) \ + _ER(EXIT_PUSHF, 0x070) \ + _ER(EXIT_POPF, 0x071) \ + _ER(EXIT_CPUID, 0x072) \ + _ER(EXIT_RSM, 0x073) \ + _ER(EXIT_IRET, 0x074) \ + _ER(EXIT_SWINT, 0x075) \ + _ER(EXIT_INVD, 0x076) \ + _ER(EXIT_PAUSE, 0x077) \ + _ER(EXIT_HLT, 0x078) \ + _ER(EXIT_INVLPG, 0x079) \ + _ER(EXIT_INVLPGA, 0x07a) \ + _ER(EXIT_IOIO, 0x07b) \ + _ER(EXIT_MSR, 0x07c) \ + _ER(EXIT_TASK_SWITCH, 0x07d) \ + _ER(EXIT_FERR_FREEZE, 0x07e) \ + _ER(EXIT_SHUTDOWN, 0x07f) \ + _ER(EXIT_VMRUN, 0x080) \ + _ER(EXIT_VMMCALL, 0x081) \ + _ER(EXIT_VMLOAD, 0x082) \ + _ER(EXIT_VMSAVE, 0x083) \ + _ER(EXIT_STGI, 0x084) \ + _ER(EXIT_CLGI, 0x085) \ + _ER(EXIT_SKINIT, 0x086) \ + _ER(EXIT_RDTSCP, 0x087) \ + _ER(EXIT_ICEBP, 0x088) \ + _ER(EXIT_WBINVD, 0x089) \ + _ER(EXIT_MONITOR, 0x08a) \ + _ER(EXIT_MWAIT, 0x08b) \ + _ER(EXIT_MWAIT_COND, 0x08c) \ + _ER(EXIT_XSETBV, 0x08d) \ + _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \ + _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \ + _ER(EXIT_ERR, -1) + +#define _ER(reason, val) { #reason, val }, +struct str_values { + const char *str; + int val; +}; + +static struct str_values vmx_exit_reasons[] = { + VMX_EXIT_REASONS + { NULL, -1} +}; + +static struct str_values svm_exit_reasons[] = { + SVM_EXIT_REASONS + { NULL, -1} +}; + +static struct isa_exit_reasons { + unsigned isa; + struct str_values *strings; +} isa_exit_reasons[] = { + { .isa = 1, .strings = vmx_exit_reasons }, + { .isa = 2, .strings = svm_exit_reasons }, + { } +}; + +static const char *find_exit_reason(unsigned isa, int val) +{ + struct str_values *strings = NULL; + int i; + + for (i = 0; isa_exit_reasons[i].strings; ++i) + if (isa_exit_reasons[i].isa == isa) { + strings = isa_exit_reasons[i].strings; + break; + } + if (!strings) + return "UNKNOWN-ISA"; + for (i = 0; strings[i].str; i++) + if (strings[i].val == val) + break; + + return strings[i].str; +} + +static int print_exit_reason(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, const char *field) +{ + unsigned long long isa; + unsigned long long val; + const char *reason; + + if (tep_get_field_val(s, event, field, record, &val, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0) + isa = 1; + + reason = find_exit_reason(isa, val); + if (reason) + trace_seq_printf(s, "reason %s", reason); + else + trace_seq_printf(s, "reason UNKNOWN (%llu)", val); + return 0; +} + +__weak const char *tep_plugin_kvm_get_func(struct tep_event *event, + struct tep_record *record, + unsigned long long *val) +{ + return NULL; +} + +__weak void tep_plugin_kvm_put_func(const char *func) +{ +} + + +static void add_rip_function(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, unsigned long long rip) +{ + unsigned long long ip = rip; + const char *func; + + func = tep_plugin_kvm_get_func(event, record, &ip); + if (func) { + trace_seq_printf(s, " %s", func); + /* The application may upate ip to the start of the function */ + if (ip != rip) + trace_seq_printf(s, "+0x%0llx", rip - ip); + tep_plugin_kvm_put_func(func); + } +} + +static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long info1 = 0, info2 = 0; + unsigned long long rip; + + if (print_exit_reason(s, record, event, "exit_reason") < 0) + return -1; + + if (tep_get_field_val(s, event, "guest_rip", record, &rip, 1) < 0) + return -1; + + trace_seq_printf(s, " rip 0x%llx", rip); + + add_rip_function(s, record, event, rip); + + if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0 + && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0) + trace_seq_printf(s, " info %llx %llx", info1, info2); + + return 0; +} + +static int kvm_entry_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long rip; + + tep_print_num_field(s, " vcpu %u", event, "vcpu_id", record, 1); + + if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0) + return -1; + + trace_seq_printf(s, " rip 0x%llx", rip); + add_rip_function(s, record, event, rip); + + return 0; +} + +#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) +#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) +#define KVM_EMUL_INSN_F_CS_D (1 << 2) +#define KVM_EMUL_INSN_F_CS_L (1 << 3) + +static int kvm_emulate_insn_handler(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long rip, csbase, len, flags, failed; + int llen; + uint8_t *insn; + const char *disasm; + + if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "len", record, &len, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0) + return -1; + + insn = tep_get_field_raw(s, event, "insn", record, &llen, 1); + if (!insn) + return -1; + + disasm = disassemble(insn, len, rip, + flags & KVM_EMUL_INSN_F_CR0_PE, + flags & KVM_EMUL_INSN_F_EFL_VM, + flags & KVM_EMUL_INSN_F_CS_D, + flags & KVM_EMUL_INSN_F_CS_L); + + trace_seq_printf(s, "%llx:%llx", csbase, rip); + add_rip_function(s, record, event, rip); + trace_seq_printf(s, ": %s%s", disasm, failed ? " FAIL" : ""); + return 0; +} + +static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + if (print_exit_reason(s, record, event, "exit_code") < 0) + return -1; + + tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1); + tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1); + tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1); + tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1); + + return 0; +} + +static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long rip; + + if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0) + return -1; + + trace_seq_printf(s, " rip %llx", rip); + add_rip_function(s, record, event, rip); + + return kvm_nested_vmexit_inject_handler(s, record, event, context); +} + +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned level:4; + unsigned cr4_pae:1; + unsigned quadrant:2; + unsigned direct:1; + unsigned access:3; + unsigned invalid:1; + unsigned nxe:1; + unsigned cr0_wp:1; + unsigned smep_and_not_wp:1; + unsigned smap_and_not_wp:1; + unsigned pad_for_nice_hex_output:8; + unsigned smm:8; + }; +}; + +static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long val; + static const char *access_str[] = { + "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; + union kvm_mmu_page_role role; + + if (tep_get_field_val(s, event, "role", record, &val, 1) < 0) + return -1; + + role.word = (int)val; + + /* + * We can only use the structure if file is of the same + * endianness. + */ + if (tep_is_file_bigendian(event->tep) == + tep_is_local_bigendian(event->tep)) { + + trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s", + role.level, + role.quadrant, + role.direct ? " direct" : "", + access_str[role.access], + role.invalid ? " invalid" : "", + role.cr4_pae ? "" : "!", + role.nxe ? "" : "!", + role.cr0_wp ? "" : "!", + role.smep_and_not_wp ? " smep" : "", + role.smap_and_not_wp ? " smap" : "", + role.smm ? " smm" : ""); + } else + trace_seq_printf(s, "WORD: %08x", role.word); + + tep_print_num_field(s, " root %u ", event, + "root_count", record, 1); + + if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0); + return 0; +} + +static int kvm_mmu_get_page_handler(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long val; + + if (tep_get_field_val(s, event, "created", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s ", val ? "new" : "existing"); + + if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "sp gfn %llx ", val); + return kvm_mmu_print_role(s, record, event, context); +} + +#define PT_WRITABLE_SHIFT 1 +#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) + +static unsigned long long +process_is_writable_pte(struct trace_seq *s, unsigned long long *args) +{ + unsigned long pte = args[0]; + return pte & PT_WRITABLE_MASK; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *tep) +{ + init_disassembler(); + + tep_register_event_handler(tep, -1, "kvm", "kvm_exit", + kvm_exit_handler, NULL); + + tep_register_event_handler(tep, -1, "kvm", "kvm_entry", + kvm_entry_handler, NULL); + + tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn", + kvm_emulate_insn_handler, NULL); + + tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", + kvm_nested_vmexit_handler, NULL); + + tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", + kvm_nested_vmexit_inject_handler, NULL); + + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", + kvm_mmu_get_page_handler, NULL); + + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(tep, -1, + "kvmmmu", "kvm_mmu_unsync_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(tep, -1, "kvmmmu", + "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, + NULL); + + tep_register_print_function(tep, + process_is_writable_pte, + TEP_FUNC_ARG_INT, + "is_writable_pte", + TEP_FUNC_ARG_LONG, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) +{ + tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit", + kvm_exit_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvm", "kvm_entry", + kvm_entry_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn", + kvm_emulate_insn_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", + kvm_nested_vmexit_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", + kvm_nested_vmexit_inject_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", + kvm_mmu_get_page_handler, NULL); + + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(tep, -1, + "kvmmmu", "kvm_mmu_unsync_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(tep, -1, "kvmmmu", + "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, + NULL); + + tep_unregister_print_function(tep, process_is_writable_pte, + "is_writable_pte"); +} |