From 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 12:05:51 +0200 Subject: Adding upstream version 5.10.209. Signed-off-by: Daniel Baumann --- tools/testing/selftests/kvm/lib/x86_64/handlers.S | 81 ++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 1258 ++++++++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/svm.c | 177 +++ tools/testing/selftests/kvm/lib/x86_64/ucall.c | 59 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 553 +++++++++ 5 files changed, 2128 insertions(+) create mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S create mode 100644 tools/testing/selftests/kvm/lib/x86_64/processor.c create mode 100644 tools/testing/selftests/kvm/lib/x86_64/svm.c create mode 100644 tools/testing/selftests/kvm/lib/x86_64/ucall.c create mode 100644 tools/testing/selftests/kvm/lib/x86_64/vmx.c (limited to 'tools/testing/selftests/kvm/lib/x86_64') diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000..aaf7bc7d2 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c new file mode 100644 index 000000000..f5d2d27be --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -0,0 +1,1258 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/x86_64/processor.c + * + * Copyright (C) 2018, Google LLC. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + +/* Minimum physical address used for virtual translation tables. */ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +vm_vaddr_t exception_handlers; + +/* Virtual translation table structure declarations */ +struct pageMapL4Entry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryPointerEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageTableEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t reserved_07:1; + uint64_t global:1; + uint64_t ignored_11_09:3; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent) +{ + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " + "rcx: 0x%.16llx rdx: 0x%.16llx\n", + indent, "", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " + "rsp: 0x%.16llx rbp: 0x%.16llx\n", + indent, "", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " + "r10: 0x%.16llx r11: 0x%.16llx\n", + indent, "", + regs->r8, regs->r9, regs->r10, regs->r11); + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " + "r14: 0x%.16llx r15: 0x%.16llx\n", + indent, "", + regs->r12, regs->r13, regs->r14, regs->r15); + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", + indent, "", + regs->rip, regs->rflags); +} + +/* + * Segment Dump + * + * Input Args: + * stream - Output FILE stream + * segment - KVM segment + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the state of the KVM segment given by @segment, to the FILE stream + * given by @stream. + */ +static void segment_dump(FILE *stream, struct kvm_segment *segment, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " + "selector: 0x%.4x type: 0x%.2x\n", + indent, "", segment->base, segment->limit, + segment->selector, segment->type); + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", + indent, "", segment->present, segment->dpl, + segment->db, segment->s, segment->l); + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " + "unusable: 0x%.2x padding: 0x%.2x\n", + indent, "", segment->g, segment->avl, + segment->unusable, segment->padding); +} + +/* + * dtable Dump + * + * Input Args: + * stream - Output FILE stream + * dtable - KVM dtable + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the state of the KVM dtable given by @dtable, to the FILE stream + * given by @stream. + */ +static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " + "padding: 0x%.4x 0x%.4x 0x%.4x\n", + indent, "", dtable->base, dtable->limit, + dtable->padding[0], dtable->padding[1], dtable->padding[2]); +} + +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent) +{ + unsigned int i; + + fprintf(stream, "%*scs:\n", indent, ""); + segment_dump(stream, &sregs->cs, indent + 2); + fprintf(stream, "%*sds:\n", indent, ""); + segment_dump(stream, &sregs->ds, indent + 2); + fprintf(stream, "%*ses:\n", indent, ""); + segment_dump(stream, &sregs->es, indent + 2); + fprintf(stream, "%*sfs:\n", indent, ""); + segment_dump(stream, &sregs->fs, indent + 2); + fprintf(stream, "%*sgs:\n", indent, ""); + segment_dump(stream, &sregs->gs, indent + 2); + fprintf(stream, "%*sss:\n", indent, ""); + segment_dump(stream, &sregs->ss, indent + 2); + fprintf(stream, "%*str:\n", indent, ""); + segment_dump(stream, &sregs->tr, indent + 2); + fprintf(stream, "%*sldt:\n", indent, ""); + segment_dump(stream, &sregs->ldt, indent + 2); + + fprintf(stream, "%*sgdt:\n", indent, ""); + dtable_dump(stream, &sregs->gdt, indent + 2); + fprintf(stream, "%*sidt:\n", indent, ""); + dtable_dump(stream, &sregs->idt, indent + 2); + + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " + "cr3: 0x%.16llx cr4: 0x%.16llx\n", + indent, "", + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " + "apic_base: 0x%.16llx\n", + indent, "", + sregs->cr8, sregs->efer, sregs->apic_base); + + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { + fprintf(stream, "%*s%.16llx\n", indent + 2, "", + sregs->interrupt_bitmap[i]); + } +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + /* If needed, create page map l4 table. */ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (vaddr >> 12) & 0x1ffu; + index[1] = (vaddr >> 21) & 0x1ffu; + index[2] = (vaddr >> 30) & 0x1ffu; + index[3] = (vaddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].present = true; + } + + /* Allocate page directory table if not present. */ + struct pageDirectoryPointerEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].present = true; + } + + /* Allocate page table if not present. */ + struct pageDirectoryEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].present = true; + } + + /* Fill in page table entry. */ + struct pageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].present = 1; +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct pageMapL4Entry *pml4e, *pml4e_start; + struct pageDirectoryPointerEntry *pdpe, *pdpe_start; + struct pageDirectoryEntry *pde, *pde_start; + struct pageTableEntry *pte, *pte_start; + + if (!vm->pgd_created) + return; + + fprintf(stream, "%*s " + " no\n", indent, ""); + fprintf(stream, "%*s index hvaddr gpaddr " + "addr w exec dirty\n", + indent, ""); + pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, + vm->pgd); + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e = &pml4e_start[n1]; + if (!pml4e->present) + continue; + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " + " %u\n", + indent, "", + pml4e - pml4e_start, pml4e, + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, + pml4e->writable, pml4e->execute_disable); + + pdpe_start = addr_gpa2hva(vm, pml4e->address + * vm->page_size); + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + pdpe = &pdpe_start[n2]; + if (!pdpe->present) + continue; + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " + "%u %u\n", + indent, "", + pdpe - pdpe_start, pdpe, + addr_hva2gpa(vm, pdpe), + (uint64_t) pdpe->address, pdpe->writable, + pdpe->execute_disable); + + pde_start = addr_gpa2hva(vm, + pdpe->address * vm->page_size); + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + pde = &pde_start[n3]; + if (!pde->present) + continue; + fprintf(stream, "%*spde 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u\n", + indent, "", pde - pde_start, pde, + addr_hva2gpa(vm, pde), + (uint64_t) pde->address, pde->writable, + pde->execute_disable); + + pte_start = addr_gpa2hva(vm, + pde->address * vm->page_size); + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + pte = &pte_start[n4]; + if (!pte->present) + continue; + fprintf(stream, "%*spte 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u " + " %u 0x%-10lx\n", + indent, "", + pte - pte_start, pte, + addr_hva2gpa(vm, pte), + (uint64_t) pte->address, + pte->writable, + pte->execute_disable, + pte->dirty, + ((uint64_t) n1 << 27) + | ((uint64_t) n2 << 18) + | ((uint64_t) n3 << 9) + | ((uint64_t) n4)); + } + } + } + } +} + +/* + * Set Unusable Segment + * + * Input Args: None + * + * Output Args: + * segp - Pointer to segment register + * + * Return: None + * + * Sets the segment register pointed to by @segp to an unusable state. + */ +static void kvm_seg_set_unusable(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->unusable = true; +} + +static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) +{ + void *gdt = addr_gva2hva(vm, vm->gdt); + struct desc64 *desc = gdt + (segp->selector >> 3) * 8; + + desc->limit0 = segp->limit & 0xFFFF; + desc->base0 = segp->base & 0xFFFF; + desc->base1 = segp->base >> 16; + desc->type = segp->type; + desc->s = segp->s; + desc->dpl = segp->dpl; + desc->p = segp->present; + desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; + desc->l = segp->l; + desc->db = segp->db; + desc->g = segp->g; + desc->base2 = segp->base >> 24; + if (!segp->s) + desc->base3 = segp->base >> 32; +} + + +/* + * Set Long Mode Flat Kernel Code Segment + * + * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by @segp, to be a code segment + * with the selector value given by @selector. + */ +static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed + * | kFlagCodeReadable + */ + segp->g = true; + segp->l = true; + segp->present = 1; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); +} + +/* + * Set Long Mode Flat Kernel Data Segment + * + * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by @segp, to be a data segment + * with the selector value given by @selector. + */ +static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed + * | kFlagDataWritable + */ + segp->g = true; + segp->present = true; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); +} + +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + struct pageDirectoryPointerEntry *pdpe; + struct pageDirectoryEntry *pde; + struct pageTableEntry *pte; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + index[0] = (gva >> 12) & 0x1ffu; + index[1] = (gva >> 21) & 0x1ffu; + index[2] = (gva >> 30) & 0x1ffu; + index[3] = (gva >> 39) & 0x1ffu; + + if (!vm->pgd_created) + goto unmapped_gva; + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) + goto unmapped_gva; + + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) + goto unmapped_gva; + + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) + goto unmapped_gva; + + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + if (!pte[index[0]].present) + goto unmapped_gva; + + return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->gdt) + vm->gdt = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + dt->base = vm->gdt; + dt->limit = getpagesize(); +} + +static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, + int selector, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->tss) + vm->tss = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + memset(segp, 0, sizeof(*segp)); + segp->base = vm->tss; + segp->limit = 0x67; + segp->selector = selector; + segp->type = 0xb; + segp->present = 1; + kvm_seg_fill_gdt_64bit(vm, segp); +} + +static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) +{ + struct kvm_sregs sregs; + + /* Set mode specific system register values. */ + vcpu_sregs_get(vm, vcpuid, &sregs); + + sregs.idt.limit = 0; + + kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); + + switch (vm->mode) { + case VM_MODE_PXXV48_4K: + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); + kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpuid, &sregs); +} + +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + struct kvm_mp_state mp_state; + struct kvm_regs regs; + vm_vaddr_t stack_vaddr; + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + /* Create VCPU */ + vm_vcpu_add(vm, vcpuid); + vcpu_setup(vm, vcpuid, 0, 0); + + /* Setup guest general purpose registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.rflags = regs.rflags | 0x2; + regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vm, vcpuid, ®s); + + /* Setup the MP state */ + mp_state.mp_state = 0; + vcpu_set_mp_state(vm, vcpuid, &mp_state); +} + +/* + * Allocate an instance of struct kvm_cpuid2 + * + * Input Args: None + * + * Output Args: None + * + * Return: A pointer to the allocated struct. The caller is responsible + * for freeing this struct. + * + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the + * array to be decided at allocation time, allocation is slightly + * complicated. This function uses a reasonable default length for + * the array and performs the appropriate allocation. + */ +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +{ + struct kvm_cpuid2 *cpuid; + int nent = 100; + size_t size; + + size = sizeof(*cpuid); + size += nent * sizeof(struct kvm_cpuid_entry2); + cpuid = malloc(size); + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + return cpuid; +} + +/* + * KVM Supported CPUID Get + * + * Input Args: None + * + * Output Args: + * + * Return: The supported KVM CPUID + * + * Get the guest CPUID supported by KVM. + */ +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int ret; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); + return cpuid; +} + +/* + * Locate a cpuid entry. + * + * Input Args: + * function: The function of the cpuid entry to find. + * index: The index of the cpuid entry. + * + * Output Args: None + * + * Return: A pointer to the cpuid entry. Never returns NULL. + */ +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry = NULL; + int i; + + cpuid = kvm_get_supported_cpuid(); + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + entry = &cpuid->entries[i]; + break; + } + } + + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", + function, index); + return entry; +} + +/* + * VM VCPU CPUID Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU id + * cpuid - The CPUID values to set. + * + * Output Args: None + * + * Return: void + * + * Set the VCPU's CPUID. + */ +void vcpu_set_cpuid(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", + rc, errno); + +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + struct kvm_vm *vm; + /* + * For x86 the maximum page table size for a memory region + * will be when only 4K pages are used. In that case the + * total extra size for page tables (for extra N pages) will + * be: N/512+N/512^2+N/512^3+... which is definitely smaller + * than N/512*2. + */ + uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + + /* Create VM */ + vm = vm_create(VM_MODE_DEFAULT, + DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, + O_RDWR); + + /* Setup guest code */ + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + + /* Setup IRQ Chip */ + vm_create_irqchip(vm); + + /* Add the first vCPU. */ + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +/* + * VCPU Get MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * + * Output Args: None + * + * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. + * + * Get value of MSR for VCPU. + */ +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + + return buffer.entry.data; +} + +/* + * _VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: The result of KVM_SET_MSRS. + * + * Sets the value of an MSR for the given VCPU. + */ +int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + memset(&buffer, 0, sizeof(buffer)); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + buffer.entry.data = msr_value; + r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); + return r; +} + +/* + * VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * + * Set value of MSR for VCPU. + */ +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + int r; + + r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value); + TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); +} + +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vm, vcpuid, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + +struct kvm_x86_state { + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + +static int kvm_get_num_msrs_fd(int kvm_fd) +{ + struct kvm_msr_list nmsrs; + int r; + + nmsrs.nmsrs = 0; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + return nmsrs.nmsrs; +} + +static int kvm_get_num_msrs(struct kvm_vm *vm) +{ + return kvm_get_num_msrs_fd(vm->kvm_fd); +} + +struct kvm_msr_list *kvm_get_msr_index_list(void) +{ + struct kvm_msr_list *list; + int nmsrs, r, kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + nmsrs = kvm_get_num_msrs_fd(kvm_fd); + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + close(kvm_fd); + + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); + + return list; +} + +struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct kvm_msr_list *list; + struct kvm_x86_state *state; + int nmsrs, r, i; + static int nested_size = -1; + + if (nested_size == -1) { + nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); + TEST_ASSERT(nested_size <= sizeof(state->nested_), + "Nested state size too big, %i > %zi", + nested_size, sizeof(state->nested_)); + } + + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vm, vcpuid); + + nmsrs = kvm_get_num_msrs(vm); + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); + + state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); + r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); + + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", + r); + } + + r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); + + if (nested_size) { + state->nested.size = sizeof(state->nested_); + r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", + r); + TEST_ASSERT(state->nested.size <= nested_size, + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); + } else + state->nested.size = 0; + + state->msrs.nmsrs = nmsrs; + for (i = 0; i < nmsrs; i++) + state->msrs.entries[i].index = list->indices[i]; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", + r, r == nmsrs ? -1 : list->indices[r]); + + r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); + + free(list); + return state; +} + +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int r; + + r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r); + + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", + r); + } + + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + + r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); + + if (state->nested.size) { + r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", + r); + } +} + +bool is_intel_cpu(void) +{ + int eax, ebx, ecx, edx; + const uint32_t *chunk; + const int leaf = 0; + + __asm__ __volatile__( + "cpuid" + : /* output */ "=a"(eax), "=b"(ebx), + "=c"(ecx), "=d"(edx) + : /* input */ "0"(leaf), "2"(0)); + + chunk = (const uint32_t *)("GenuineIntel"); + return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); +} + +uint32_t kvm_get_cpuid_max_basic(void) +{ + return kvm_get_supported_cpuid_entry(0)->eax; +} + +uint32_t kvm_get_cpuid_max_extended(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + struct kvm_cpuid_entry2 *entry; + bool pae; + + /* SDM 4.1.4 */ + if (kvm_get_cpuid_max_extended() < 0x80000008) { + pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); + *pa_bits = pae ? 36 : 32; + *va_bits = 32; + } else { + entry = kvm_get_supported_cpuid_entry(0x80000008); + *pa_bits = entry->eax & 0xff; + *va_bits = (entry->eax >> 8) & 0xff; + } +} + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c new file mode 100644 index 000000000..a58507a7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/x86_64/svm.c + * Helpers used for nested SVM testing + * Largely inspired from KVM unit test svm.c + * + * Copyright (C) 2020, Red Hat, Inc. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" +#include "svm_util.h" + +struct gpr64_regs guest_regs; +u64 rflags; + +/* Allocate memory regions for nested SVM tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_svm_gva - The guest virtual address for the struct svm_test_data. + * + * Return: + * Pointer to structure with the addresses of the SVM areas. + */ +struct svm_test_data * +vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +{ + vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); + + svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); + svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); + + svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); + svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); + + *p_svm_gva = svm_gva; + return svm; +} + +static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, + u64 base, u32 limit, u32 attr) +{ + seg->selector = selector; + seg->attrib = attr; + seg->limit = limit; + seg->base = base; +} + +/* + * Avoid using memset to clear the vmcb, since libc may not be + * available in L1 (and, even if it is, features that libc memset may + * want to use, like AVX, may not be enabled). + */ +static void clear_vmcb(struct vmcb *vmcb) +{ + int n = sizeof(*vmcb) / sizeof(u32); + + asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory"); +} + +void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) +{ + struct vmcb *vmcb = svm->vmcb; + uint64_t vmcb_gpa = svm->vmcb_gpa; + struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *ctrl = &vmcb->control; + u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; + u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; + uint64_t efer; + + efer = rdmsr(MSR_EFER); + wrmsr(MSR_EFER, efer | EFER_SVME); + wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); + + clear_vmcb(vmcb); + asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); + vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); + vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0); + vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0); + + ctrl->asid = 1; + save->cpl = 0; + save->efer = rdmsr(MSR_EFER); + asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory"); + asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory"); + asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory"); + asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory"); + asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory"); + asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory"); + save->g_pat = rdmsr(MSR_IA32_CR_PAT); + save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | + (1ULL << INTERCEPT_VMMCALL); + + vmcb->save.rip = (u64)guest_rip; + vmcb->save.rsp = (u64)guest_rsp; + guest_regs.rdi = (u64)svm; +} + +/* + * save/restore 64-bit general registers except rax, rip, rsp + * which are directly handed through the VMCB guest processor state + */ +#define SAVE_GPR_C \ + "xchg %%rbx, guest_regs+0x20\n\t" \ + "xchg %%rcx, guest_regs+0x10\n\t" \ + "xchg %%rdx, guest_regs+0x18\n\t" \ + "xchg %%rbp, guest_regs+0x30\n\t" \ + "xchg %%rsi, guest_regs+0x38\n\t" \ + "xchg %%rdi, guest_regs+0x40\n\t" \ + "xchg %%r8, guest_regs+0x48\n\t" \ + "xchg %%r9, guest_regs+0x50\n\t" \ + "xchg %%r10, guest_regs+0x58\n\t" \ + "xchg %%r11, guest_regs+0x60\n\t" \ + "xchg %%r12, guest_regs+0x68\n\t" \ + "xchg %%r13, guest_regs+0x70\n\t" \ + "xchg %%r14, guest_regs+0x78\n\t" \ + "xchg %%r15, guest_regs+0x80\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +/* + * selftests do not use interrupts so we dropped clgi/sti/cli/stgi + * for now. registers involved in LOAD/SAVE_GPR_C are eventually + * unmodified so they do not need to be in the clobber list. + */ +void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +{ + asm volatile ( + "vmload %[vmcb_gpa]\n\t" + "mov rflags, %%r15\n\t" // rflags + "mov %%r15, 0x170(%[vmcb])\n\t" + "mov guest_regs, %%r15\n\t" // rax + "mov %%r15, 0x1f8(%[vmcb])\n\t" + LOAD_GPR_C + "vmrun %[vmcb_gpa]\n\t" + SAVE_GPR_C + "mov 0x170(%[vmcb]), %%r15\n\t" // rflags + "mov %%r15, rflags\n\t" + "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax + "mov %%r15, guest_regs\n\t" + "vmsave %[vmcb_gpa]\n\t" + : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) + : "r15", "memory"); +} + +bool nested_svm_supported(void) +{ + struct kvm_cpuid_entry2 *entry = + kvm_get_supported_cpuid_entry(0x80000001); + + return entry->ecx & CPUID_SVM; +} + +void nested_svm_check_supported(void) +{ + if (!nested_svm_supported()) { + print_skip("nested SVM not enabled"); + exit(KSFT_SKIP); + } +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c new file mode 100644 index 000000000..a3489973e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +void ucall_init(struct kvm_vm *vm, void *arg) +{ +} + +void ucall_uninit(struct kvm_vm *vm) +{ +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + asm volatile("in %[port], %%al" + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (uc) + memset(uc, 0, sizeof(*uc)); + + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + + vcpu_regs_get(vm, vcpu_id, ®s); + memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), + sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c new file mode 100644 index 000000000..2448b30e8 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -0,0 +1,553 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/x86_64/vmx.c + * + * Copyright (C) 2018, Google LLC. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" +#include "vmx.h" + +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + +bool enable_evmcs; + +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; +int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) +{ + uint16_t evmcs_ver; + + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap); + + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x\n", + evmcs_ver & 0xff, evmcs_ver >> 8); + + return evmcs_ver; +} + +/* Allocate memory regions for nested VMX tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_vmx_gva - The guest virtual address for the struct vmx_pages. + * + * Return: + * Pointer to structure with the addresses of the VMX areas. + */ +struct vmx_pages * +vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +{ + vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); + + /* Setup of a region of guest memory for the vmxon region. */ + vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); + vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); + + /* Setup of a region of guest memory for a vmcs. */ + vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); + vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); + + /* Setup of a region of guest memory for the MSR bitmap. */ + vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); + vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); + memset(vmx->msr_hva, 0, getpagesize()); + + /* Setup of a region of guest memory for the shadow VMCS. */ + vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); + vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); + + /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ + vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); + vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); + memset(vmx->vmread_hva, 0, getpagesize()); + + vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); + vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); + memset(vmx->vmwrite_hva, 0, getpagesize()); + + /* Setup of a region of guest memory for the VP Assist page. */ + vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); + vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->enlightened_vmcs_hva = + addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); + vmx->enlightened_vmcs_gpa = + addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); + + *p_vmx_gva = vmx_gva; + return vmx; +} + +bool prepare_for_vmx_operation(struct vmx_pages *vmx) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + required |= FEAT_CTL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEAT_CTL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); + + /* Enter VMX root operation. */ + *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + if (vmxon(vmx->vmxon_gpa)) + return false; + + return true; +} + +bool load_vmcs(struct vmx_pages *vmx) +{ + if (!enable_evmcs) { + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = + vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + } else { + if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, + vmx->enlightened_vmcs)) + return false; + current_evmcs->revision_id = EVMCS_VERSION; + } + + return true; +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(struct vmx_pages *vmx) +{ + uint32_t sec_exec_ctl = 0; + + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); + else { + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); + + vmwrite(MSR_BITMAP, vmx->msr_gpa); + vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); + vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt().address + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt().address); + vmwrite(HOST_IDTR_BASE, get_idt().address); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(vmx); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} + +bool nested_vmx_supported(void) +{ + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + return entry->ecx & CPUID_VMX; +} + +void nested_vmx_check_supported(void) +{ + if (!nested_vmx_supported()) { + print_skip("nested VMX not enabled"); + exit(KSFT_SKIP); + } +} + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} -- cgit v1.2.3