Adding upstream version 5.10.209.upstream/5.10.209

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
commit: 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree: a94efe259b9009378be6d90eb30d2b019d95c194 /tools/testing/selftests/kvm
parent: Initial commit. (diff)
download: linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz
linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip
62 files changed, 18345 insertions, 0 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000..7a2c242b7
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/get-reg-list
+/aarch64/get-reg-list-sve
+/s390x/memop
+/s390x/resets
+/s390x/sync_regs_test
+/x86_64/cr4_cpuid_sync_test
+/x86_64/debug_regs
+/x86_64/evmcs_test
+/x86_64/kvm_pv_test
+/x86_64/hyperv_cpuid
+/x86_64/mmio_warning_test
+/x86_64/platform_info_test
+/x86_64/set_sregs_test
+/x86_64/smm_test
+/x86_64/state_test
+/x86_64/user_msr_test
+/x86_64/vmx_preemption_timer_test
+/x86_64/svm_vmcall_test
+/x86_64/sync_regs_test
+/x86_64/vmx_apic_access_test
+/x86_64/vmx_close_while_nested_test
+/x86_64/vmx_dirty_log_test
+/x86_64/vmx_set_nested_state_test
+/x86_64/vmx_tsc_adjust_test
+/x86_64/xss_msr_test
+/clear_dirty_log_test
+/demand_paging_test
+/dirty_log_test
+/dirty_log_perf_test
+/kvm_create_max_vcpus
+/set_memory_region_test
+/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000..3d14ef777
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../../../scripts/Kbuild.include
+
+all:
+
+top_srcdir = ../../../..
+KSFT_KHDR_INSTALL := 1
+
+# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
+# directories and targets in this Makefile. "uname -m" doesn't map to
+# arch specific sub-directory names.
+#
+# UNAME_M variable to used to run the compiles pointing to the right arch
+# directories and build the right targets for these supported architectures.
+#
+# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
+# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
+#
+# x86_64 targets are named to include x86_64 as a suffix and directories
+# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
+# same convention. "uname -m" doesn't result in the correct mapping for
+# s390x and aarch64.
+#
+# No change necessary for x86_64
+UNAME_M := $(shell uname -m)
+
+# Set UNAME_M for arm64 compile/install to work
+ifeq ($(ARCH),arm64)
+	UNAME_M := aarch64
+endif
+# Set UNAME_M s390x compile/install to work
+ifeq ($(ARCH),s390)
+	UNAME_M := s390x
+endif
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+
+TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
+TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/smm_test
+TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
+TEST_GEN_PROGS_x86_64 += demand_paging_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += set_memory_region_test
+TEST_GEN_PROGS_x86_64 += steal_time
+
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
+TEST_GEN_PROGS_aarch64 += demand_paging_test
+TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += set_memory_region_test
+TEST_GEN_PROGS_aarch64 += steal_time
+
+TEST_GEN_PROGS_s390x = s390x/memop
+TEST_GEN_PROGS_s390x += s390x/resets
+TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += demand_paging_test
+TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += set_memory_region_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),x86_64)
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+else
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+endif
+CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
+	-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
+	-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
+	-I$(<D) -Iinclude/$(UNAME_M) -I..
+
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+        $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
+
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
+	$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
+	$(AR) crs $@ $^
+
+x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+all: $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+	$(RM) cscope.*
+	(find $(include_paths) -name '*.h' \
+		-exec realpath --relative-base=$(PWD) {} \;; \
+	find . -name '*.c' \
+		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+	cscope -b
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
new file mode 100644
index 000000000..efba76682
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define REG_LIST_SVE
+#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
new file mode 100644
index 000000000..33218a395
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * Note, the blessed list should be created from the oldest possible
+ * kernel. We can't go older than v4.15, though, because that's the first
+ * release to expose the ID system registers in KVM_GET_REG_LIST, see
+ * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
+ * from guests"). Also, one must use the --core-reg-fixup command line
+ * option when running on an older kernel that doesn't include df205b5c6328
+ * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#ifdef REG_LIST_SVE
+#define reg_list_sve() (true)
+#else
+#define reg_list_sve() (false)
+#endif
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define for_each_reg(i)								\
+	for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_missing_reg(i)							\
+	for ((i) = 0; (i) < blessed_n; ++(i))					\
+		if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
+
+#define for_each_new_reg(i)							\
+	for ((i) = 0; (i) < reg_list->n; ++(i))					\
+		if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+
+static struct kvm_reg_list *reg_list;
+
+static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
+static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
+static __u64 *blessed_reg, blessed_n;
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+	int i;
+
+	for (i = 0; i < nr_regs; ++i)
+		if (reg == regs[i])
+			return true;
+	return false;
+}
+
+static const char *str_with_index(const char *template, __u64 index)
+{
+	char *str, *p;
+	int n;
+
+	str = strdup(template);
+	p = strstr(str, "##");
+	n = sprintf(p, "%lld", index);
+	strcat(p + n, strstr(template, "##") + 2);
+
+	return (const char *)str;
+}
+
+#define CORE_REGS_XX_NR_WORDS	2
+#define CORE_SPSR_XX_NR_WORDS	2
+#define CORE_FPREGS_XX_NR_WORDS	4
+
+static const char *core_id_to_str(__u64 id)
+{
+	__u64 core_off = id & ~REG_MASK, idx;
+
+	/*
+	 * core_off is the offset into struct kvm_regs
+	 */
+	switch (core_off) {
+	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+		TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+	case KVM_REG_ARM_CORE_REG(regs.sp):
+		return "KVM_REG_ARM_CORE_REG(regs.sp)";
+	case KVM_REG_ARM_CORE_REG(regs.pc):
+		return "KVM_REG_ARM_CORE_REG(regs.pc)";
+	case KVM_REG_ARM_CORE_REG(regs.pstate):
+		return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+	case KVM_REG_ARM_CORE_REG(sp_el1):
+		return "KVM_REG_ARM_CORE_REG(sp_el1)";
+	case KVM_REG_ARM_CORE_REG(elr_el1):
+		return "KVM_REG_ARM_CORE_REG(elr_el1)";
+	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+		TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+		TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+		return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+		return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+	}
+
+	TEST_FAIL("Unknown core reg id: 0x%llx", id);
+	return NULL;
+}
+
+static const char *sve_id_to_str(__u64 id)
+{
+	__u64 sve_off, n, i;
+
+	if (id == KVM_REG_ARM64_SVE_VLS)
+		return "KVM_REG_ARM64_SVE_VLS";
+
+	sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+	i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+	TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+
+	switch (sve_off) {
+	case KVM_REG_ARM64_SVE_ZREG_BASE ...
+	     KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+		n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+			    "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+		return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+	case KVM_REG_ARM64_SVE_PREG_BASE ...
+	     KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+		n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+			    "Unexpected bits set in SVE PREG id: 0x%llx", id);
+		return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+	case KVM_REG_ARM64_SVE_FFR_BASE:
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+			    "Unexpected bits set in SVE FFR id: 0x%llx", id);
+		return "KVM_REG_ARM64_SVE_FFR(0)";
+	}
+
+	return NULL;
+}
+
+static void print_reg(__u64 id)
+{
+	unsigned op0, op1, crn, crm, op2;
+	const char *reg_size = NULL;
+
+	TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+		    "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U8:
+		reg_size = "KVM_REG_SIZE_U8";
+		break;
+	case KVM_REG_SIZE_U16:
+		reg_size = "KVM_REG_SIZE_U16";
+		break;
+	case KVM_REG_SIZE_U32:
+		reg_size = "KVM_REG_SIZE_U32";
+		break;
+	case KVM_REG_SIZE_U64:
+		reg_size = "KVM_REG_SIZE_U64";
+		break;
+	case KVM_REG_SIZE_U128:
+		reg_size = "KVM_REG_SIZE_U128";
+		break;
+	case KVM_REG_SIZE_U256:
+		reg_size = "KVM_REG_SIZE_U256";
+		break;
+	case KVM_REG_SIZE_U512:
+		reg_size = "KVM_REG_SIZE_U512";
+		break;
+	case KVM_REG_SIZE_U1024:
+		reg_size = "KVM_REG_SIZE_U1024";
+		break;
+	case KVM_REG_SIZE_U2048:
+		reg_size = "KVM_REG_SIZE_U2048";
+		break;
+	default:
+		TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
+			  (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+	}
+
+	switch (id & KVM_REG_ARM_COPROC_MASK) {
+	case KVM_REG_ARM_CORE:
+		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+		break;
+	case KVM_REG_ARM_DEMUX:
+		TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+			    "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+		       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+		break;
+	case KVM_REG_ARM64_SYSREG:
+		op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+		op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+		crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+		crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+		op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+		TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+			    "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+		printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+		break;
+	case KVM_REG_ARM_FW:
+		TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+			    "Unexpected bits set in FW reg id: 0x%llx", id);
+		printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+		break;
+	case KVM_REG_ARM64_SVE:
+		if (reg_list_sve())
+			printf("\t%s,\n", sve_id_to_str(id));
+		else
+			TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+		break;
+	default:
+		TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+			  (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+	}
+}
+
+/*
+ * Older kernels listed each 32-bit word of CORE registers separately.
+ * For 64 and 128-bit registers we need to ignore the extra words. We
+ * also need to fixup the sizes, because the older kernels stated all
+ * registers were 64-bit, even when they weren't.
+ */
+static void core_reg_fixup(void)
+{
+	struct kvm_reg_list *tmp;
+	__u64 id, core_off;
+	int i;
+
+	tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
+
+	for (i = 0; i < reg_list->n; ++i) {
+		id = reg_list->reg[i];
+
+		if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
+			tmp->reg[tmp->n++] = id;
+			continue;
+		}
+
+		core_off = id & ~REG_MASK;
+
+		switch (core_off) {
+		case 0x52: case 0xd2: case 0xd6:
+			/*
+			 * These offsets are pointing at padding.
+			 * We need to ignore them too.
+			 */
+			continue;
+		case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+		     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+			if (core_off & 3)
+				continue;
+			id &= ~KVM_REG_SIZE_MASK;
+			id |= KVM_REG_SIZE_U128;
+			tmp->reg[tmp->n++] = id;
+			continue;
+		case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+		case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+			id &= ~KVM_REG_SIZE_MASK;
+			id |= KVM_REG_SIZE_U32;
+			tmp->reg[tmp->n++] = id;
+			continue;
+		default:
+			if (core_off & 1)
+				continue;
+			tmp->reg[tmp->n++] = id;
+			break;
+		}
+	}
+
+	free(reg_list);
+	reg_list = tmp;
+}
+
+static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+{
+	if (reg_list_sve())
+		init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+}
+
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	int feature;
+
+	if (reg_list_sve()) {
+		feature = KVM_ARM_VCPU_SVE;
+		vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+	}
+}
+
+static void check_supported(void)
+{
+	if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
+		fprintf(stderr, "SVE not available, skipping tests\n");
+		exit(KSFT_SKIP);
+	}
+}
+
+int main(int ac, char **av)
+{
+	struct kvm_vcpu_init init = { .target = -1, };
+	int new_regs = 0, missing_regs = 0, i;
+	int failed_get = 0, failed_set = 0, failed_reject = 0;
+	bool print_list = false, fixup_core_regs = false;
+	struct kvm_vm *vm;
+	__u64 *vec_regs;
+
+	check_supported();
+
+	for (i = 1; i < ac; ++i) {
+		if (strcmp(av[i], "--core-reg-fixup") == 0)
+			fixup_core_regs = true;
+		else if (strcmp(av[i], "--list") == 0)
+			print_list = true;
+		else
+			fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+	}
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	prepare_vcpu_init(&init);
+	aarch64_vcpu_add_default(vm, 0, &init, NULL);
+	finalize_vcpu(vm, 0);
+
+	reg_list = vcpu_get_reg_list(vm, 0);
+
+	if (fixup_core_regs)
+		core_reg_fixup();
+
+	if (print_list) {
+		putchar('\n');
+		for_each_reg(i)
+			print_reg(reg_list->reg[i]);
+		putchar('\n');
+		return 0;
+	}
+
+	/*
+	 * We only test that we can get the register and then write back the
+	 * same value. Some registers may allow other values to be written
+	 * back, but others only allow some bits to be changed, and at least
+	 * for ID registers set will fail if the value does not exactly match
+	 * what was returned by get. If registers that allow other values to
+	 * be written need to have the other values tested, then we should
+	 * create a new set of tests for those in a new independent test
+	 * executable.
+	 */
+	for_each_reg(i) {
+		uint8_t addr[2048 / 8];
+		struct kvm_one_reg reg = {
+			.id = reg_list->reg[i],
+			.addr = (__u64)&addr,
+		};
+		int ret;
+
+		ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+		if (ret) {
+			puts("Failed to get ");
+			print_reg(reg.id);
+			putchar('\n');
+			++failed_get;
+		}
+
+		/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
+		if (find_reg(rejects_set, rejects_set_n, reg.id)) {
+			ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+			if (ret != -1 || errno != EPERM) {
+				printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
+				print_reg(reg.id);
+				putchar('\n');
+				++failed_reject;
+			}
+			continue;
+		}
+
+		ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+		if (ret) {
+			puts("Failed to set ");
+			print_reg(reg.id);
+			putchar('\n');
+			++failed_set;
+		}
+	}
+
+	if (reg_list_sve()) {
+		blessed_n = base_regs_n + sve_regs_n;
+		vec_regs = sve_regs;
+	} else {
+		blessed_n = base_regs_n + vregs_n;
+		vec_regs = vregs;
+	}
+
+	blessed_reg = calloc(blessed_n, sizeof(__u64));
+	for (i = 0; i < base_regs_n; ++i)
+		blessed_reg[i] = base_regs[i];
+	for (i = 0; i < blessed_n - base_regs_n; ++i)
+		blessed_reg[base_regs_n + i] = vec_regs[i];
+
+	for_each_new_reg(i)
+		++new_regs;
+
+	for_each_missing_reg(i)
+		++missing_regs;
+
+	if (new_regs || missing_regs) {
+		printf("Number blessed registers: %5lld\n", blessed_n);
+		printf("Number registers:         %5lld\n", reg_list->n);
+	}
+
+	if (new_regs) {
+		printf("\nThere are %d new registers.\n"
+		       "Consider adding them to the blessed reg "
+		       "list with the following lines:\n\n", new_regs);
+		for_each_new_reg(i)
+			print_reg(reg_list->reg[i]);
+		putchar('\n');
+	}
+
+	if (missing_regs) {
+		printf("\nThere are %d missing registers.\n"
+		       "The following lines are missing registers:\n\n", missing_regs);
+		for_each_missing_reg(i)
+			print_reg(blessed_reg[i]);
+		putchar('\n');
+	}
+
+	TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+		    "There are %d missing registers; "
+		    "%d registers failed get; %d registers failed set; %d registers failed reject",
+		    missing_regs, failed_get, failed_set, failed_reject);
+
+	return 0;
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+	KVM_REG_ARM_FW_REG(0),
+	KVM_REG_ARM_FW_REG(1),
+	KVM_REG_ARM_FW_REG(2),
+	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 0, 2),
+	ARM64_SYS_REG(3, 0, 0, 0, 0),	/* MIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 0, 6),	/* REVIDR_EL1 */
+	ARM64_SYS_REG(3, 1, 0, 0, 1),	/* CLIDR_EL1 */
+	ARM64_SYS_REG(3, 1, 0, 0, 7),	/* AIDR_EL1 */
+	ARM64_SYS_REG(3, 3, 0, 0, 1),	/* CTR_EL0 */
+	ARM64_SYS_REG(2, 0, 0, 0, 4),
+	ARM64_SYS_REG(2, 0, 0, 0, 5),
+	ARM64_SYS_REG(2, 0, 0, 0, 6),
+	ARM64_SYS_REG(2, 0, 0, 0, 7),
+	ARM64_SYS_REG(2, 0, 0, 1, 4),
+	ARM64_SYS_REG(2, 0, 0, 1, 5),
+	ARM64_SYS_REG(2, 0, 0, 1, 6),
+	ARM64_SYS_REG(2, 0, 0, 1, 7),
+	ARM64_SYS_REG(2, 0, 0, 2, 0),	/* MDCCINT_EL1 */
+	ARM64_SYS_REG(2, 0, 0, 2, 2),	/* MDSCR_EL1 */
+	ARM64_SYS_REG(2, 0, 0, 2, 4),
+	ARM64_SYS_REG(2, 0, 0, 2, 5),
+	ARM64_SYS_REG(2, 0, 0, 2, 6),
+	ARM64_SYS_REG(2, 0, 0, 2, 7),
+	ARM64_SYS_REG(2, 0, 0, 3, 4),
+	ARM64_SYS_REG(2, 0, 0, 3, 5),
+	ARM64_SYS_REG(2, 0, 0, 3, 6),
+	ARM64_SYS_REG(2, 0, 0, 3, 7),
+	ARM64_SYS_REG(2, 0, 0, 4, 4),
+	ARM64_SYS_REG(2, 0, 0, 4, 5),
+	ARM64_SYS_REG(2, 0, 0, 4, 6),
+	ARM64_SYS_REG(2, 0, 0, 4, 7),
+	ARM64_SYS_REG(2, 0, 0, 5, 4),
+	ARM64_SYS_REG(2, 0, 0, 5, 5),
+	ARM64_SYS_REG(2, 0, 0, 5, 6),
+	ARM64_SYS_REG(2, 0, 0, 5, 7),
+	ARM64_SYS_REG(2, 0, 0, 6, 4),
+	ARM64_SYS_REG(2, 0, 0, 6, 5),
+	ARM64_SYS_REG(2, 0, 0, 6, 6),
+	ARM64_SYS_REG(2, 0, 0, 6, 7),
+	ARM64_SYS_REG(2, 0, 0, 7, 4),
+	ARM64_SYS_REG(2, 0, 0, 7, 5),
+	ARM64_SYS_REG(2, 0, 0, 7, 6),
+	ARM64_SYS_REG(2, 0, 0, 7, 7),
+	ARM64_SYS_REG(2, 0, 0, 8, 4),
+	ARM64_SYS_REG(2, 0, 0, 8, 5),
+	ARM64_SYS_REG(2, 0, 0, 8, 6),
+	ARM64_SYS_REG(2, 0, 0, 8, 7),
+	ARM64_SYS_REG(2, 0, 0, 9, 4),
+	ARM64_SYS_REG(2, 0, 0, 9, 5),
+	ARM64_SYS_REG(2, 0, 0, 9, 6),
+	ARM64_SYS_REG(2, 0, 0, 9, 7),
+	ARM64_SYS_REG(2, 0, 0, 10, 4),
+	ARM64_SYS_REG(2, 0, 0, 10, 5),
+	ARM64_SYS_REG(2, 0, 0, 10, 6),
+	ARM64_SYS_REG(2, 0, 0, 10, 7),
+	ARM64_SYS_REG(2, 0, 0, 11, 4),
+	ARM64_SYS_REG(2, 0, 0, 11, 5),
+	ARM64_SYS_REG(2, 0, 0, 11, 6),
+	ARM64_SYS_REG(2, 0, 0, 11, 7),
+	ARM64_SYS_REG(2, 0, 0, 12, 4),
+	ARM64_SYS_REG(2, 0, 0, 12, 5),
+	ARM64_SYS_REG(2, 0, 0, 12, 6),
+	ARM64_SYS_REG(2, 0, 0, 12, 7),
+	ARM64_SYS_REG(2, 0, 0, 13, 4),
+	ARM64_SYS_REG(2, 0, 0, 13, 5),
+	ARM64_SYS_REG(2, 0, 0, 13, 6),
+	ARM64_SYS_REG(2, 0, 0, 13, 7),
+	ARM64_SYS_REG(2, 0, 0, 14, 4),
+	ARM64_SYS_REG(2, 0, 0, 14, 5),
+	ARM64_SYS_REG(2, 0, 0, 14, 6),
+	ARM64_SYS_REG(2, 0, 0, 14, 7),
+	ARM64_SYS_REG(2, 0, 0, 15, 4),
+	ARM64_SYS_REG(2, 0, 0, 15, 5),
+	ARM64_SYS_REG(2, 0, 0, 15, 6),
+	ARM64_SYS_REG(2, 0, 0, 15, 7),
+	ARM64_SYS_REG(2, 4, 0, 7, 0),	/* DBGVCR32_EL2 */
+	ARM64_SYS_REG(3, 0, 0, 0, 5),	/* MPIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 0),	/* ID_PFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 1),	/* ID_PFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 2),	/* ID_DFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 3),	/* ID_AFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 4),	/* ID_MMFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 5),	/* ID_MMFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 6),	/* ID_MMFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 7),	/* ID_MMFR3_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 0),	/* ID_ISAR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 1),	/* ID_ISAR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 2),	/* ID_ISAR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 3),	/* ID_ISAR3_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 4),	/* ID_ISAR4_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 5),	/* ID_ISAR5_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 6),	/* ID_MMFR4_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 7),	/* ID_ISAR6_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 0),	/* MVFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 1),	/* MVFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 2),	/* MVFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 3),
+	ARM64_SYS_REG(3, 0, 0, 3, 4),	/* ID_PFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 5),	/* ID_DFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 6),	/* ID_MMFR5_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 7),
+	ARM64_SYS_REG(3, 0, 0, 4, 0),	/* ID_AA64PFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 1),	/* ID_AA64PFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 2),
+	ARM64_SYS_REG(3, 0, 0, 4, 3),
+	ARM64_SYS_REG(3, 0, 0, 4, 4),	/* ID_AA64ZFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 5),
+	ARM64_SYS_REG(3, 0, 0, 4, 6),
+	ARM64_SYS_REG(3, 0, 0, 4, 7),
+	ARM64_SYS_REG(3, 0, 0, 5, 0),	/* ID_AA64DFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 1),	/* ID_AA64DFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 2),
+	ARM64_SYS_REG(3, 0, 0, 5, 3),
+	ARM64_SYS_REG(3, 0, 0, 5, 4),	/* ID_AA64AFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 5),	/* ID_AA64AFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 6),
+	ARM64_SYS_REG(3, 0, 0, 5, 7),
+	ARM64_SYS_REG(3, 0, 0, 6, 0),	/* ID_AA64ISAR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 6, 1),	/* ID_AA64ISAR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 6, 2),
+	ARM64_SYS_REG(3, 0, 0, 6, 3),
+	ARM64_SYS_REG(3, 0, 0, 6, 4),
+	ARM64_SYS_REG(3, 0, 0, 6, 5),
+	ARM64_SYS_REG(3, 0, 0, 6, 6),
+	ARM64_SYS_REG(3, 0, 0, 6, 7),
+	ARM64_SYS_REG(3, 0, 0, 7, 0),	/* ID_AA64MMFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 1),	/* ID_AA64MMFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 2),	/* ID_AA64MMFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 3),
+	ARM64_SYS_REG(3, 0, 0, 7, 4),
+	ARM64_SYS_REG(3, 0, 0, 7, 5),
+	ARM64_SYS_REG(3, 0, 0, 7, 6),
+	ARM64_SYS_REG(3, 0, 0, 7, 7),
+	ARM64_SYS_REG(3, 0, 1, 0, 0),	/* SCTLR_EL1 */
+	ARM64_SYS_REG(3, 0, 1, 0, 1),	/* ACTLR_EL1 */
+	ARM64_SYS_REG(3, 0, 1, 0, 2),	/* CPACR_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 0),	/* TTBR0_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 1),	/* TTBR1_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 2),	/* TCR_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 1, 0),	/* AFSR0_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 1, 1),	/* AFSR1_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 2, 0),	/* ESR_EL1 */
+	ARM64_SYS_REG(3, 0, 6, 0, 0),	/* FAR_EL1 */
+	ARM64_SYS_REG(3, 0, 7, 4, 0),	/* PAR_EL1 */
+	ARM64_SYS_REG(3, 0, 9, 14, 1),	/* PMINTENSET_EL1 */
+	ARM64_SYS_REG(3, 0, 9, 14, 2),	/* PMINTENCLR_EL1 */
+	ARM64_SYS_REG(3, 0, 10, 2, 0),	/* MAIR_EL1 */
+	ARM64_SYS_REG(3, 0, 10, 3, 0),	/* AMAIR_EL1 */
+	ARM64_SYS_REG(3, 0, 12, 0, 0),	/* VBAR_EL1 */
+	ARM64_SYS_REG(3, 0, 12, 1, 1),	/* DISR_EL1 */
+	ARM64_SYS_REG(3, 0, 13, 0, 1),	/* CONTEXTIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 13, 0, 4),	/* TPIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 14, 1, 0),	/* CNTKCTL_EL1 */
+	ARM64_SYS_REG(3, 2, 0, 0, 0),	/* CSSELR_EL1 */
+	ARM64_SYS_REG(3, 3, 9, 12, 0),	/* PMCR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 1),	/* PMCNTENSET_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 2),	/* PMCNTENCLR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 3),	/* PMOVSCLR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 4),	/* PMSWINC_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 5),	/* PMSELR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 13, 0),	/* PMCCNTR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 14, 0),	/* PMUSERENR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 14, 3),	/* PMOVSSET_EL0 */
+	ARM64_SYS_REG(3, 3, 13, 0, 2),	/* TPIDR_EL0 */
+	ARM64_SYS_REG(3, 3, 13, 0, 3),	/* TPIDRRO_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 8, 0),
+	ARM64_SYS_REG(3, 3, 14, 8, 1),
+	ARM64_SYS_REG(3, 3, 14, 8, 2),
+	ARM64_SYS_REG(3, 3, 14, 8, 3),
+	ARM64_SYS_REG(3, 3, 14, 8, 4),
+	ARM64_SYS_REG(3, 3, 14, 8, 5),
+	ARM64_SYS_REG(3, 3, 14, 8, 6),
+	ARM64_SYS_REG(3, 3, 14, 8, 7),
+	ARM64_SYS_REG(3, 3, 14, 9, 0),
+	ARM64_SYS_REG(3, 3, 14, 9, 1),
+	ARM64_SYS_REG(3, 3, 14, 9, 2),
+	ARM64_SYS_REG(3, 3, 14, 9, 3),
+	ARM64_SYS_REG(3, 3, 14, 9, 4),
+	ARM64_SYS_REG(3, 3, 14, 9, 5),
+	ARM64_SYS_REG(3, 3, 14, 9, 6),
+	ARM64_SYS_REG(3, 3, 14, 9, 7),
+	ARM64_SYS_REG(3, 3, 14, 10, 0),
+	ARM64_SYS_REG(3, 3, 14, 10, 1),
+	ARM64_SYS_REG(3, 3, 14, 10, 2),
+	ARM64_SYS_REG(3, 3, 14, 10, 3),
+	ARM64_SYS_REG(3, 3, 14, 10, 4),
+	ARM64_SYS_REG(3, 3, 14, 10, 5),
+	ARM64_SYS_REG(3, 3, 14, 10, 6),
+	ARM64_SYS_REG(3, 3, 14, 10, 7),
+	ARM64_SYS_REG(3, 3, 14, 11, 0),
+	ARM64_SYS_REG(3, 3, 14, 11, 1),
+	ARM64_SYS_REG(3, 3, 14, 11, 2),
+	ARM64_SYS_REG(3, 3, 14, 11, 3),
+	ARM64_SYS_REG(3, 3, 14, 11, 4),
+	ARM64_SYS_REG(3, 3, 14, 11, 5),
+	ARM64_SYS_REG(3, 3, 14, 11, 6),
+	ARM64_SYS_REG(3, 3, 14, 12, 0),
+	ARM64_SYS_REG(3, 3, 14, 12, 1),
+	ARM64_SYS_REG(3, 3, 14, 12, 2),
+	ARM64_SYS_REG(3, 3, 14, 12, 3),
+	ARM64_SYS_REG(3, 3, 14, 12, 4),
+	ARM64_SYS_REG(3, 3, 14, 12, 5),
+	ARM64_SYS_REG(3, 3, 14, 12, 6),
+	ARM64_SYS_REG(3, 3, 14, 12, 7),
+	ARM64_SYS_REG(3, 3, 14, 13, 0),
+	ARM64_SYS_REG(3, 3, 14, 13, 1),
+	ARM64_SYS_REG(3, 3, 14, 13, 2),
+	ARM64_SYS_REG(3, 3, 14, 13, 3),
+	ARM64_SYS_REG(3, 3, 14, 13, 4),
+	ARM64_SYS_REG(3, 3, 14, 13, 5),
+	ARM64_SYS_REG(3, 3, 14, 13, 6),
+	ARM64_SYS_REG(3, 3, 14, 13, 7),
+	ARM64_SYS_REG(3, 3, 14, 14, 0),
+	ARM64_SYS_REG(3, 3, 14, 14, 1),
+	ARM64_SYS_REG(3, 3, 14, 14, 2),
+	ARM64_SYS_REG(3, 3, 14, 14, 3),
+	ARM64_SYS_REG(3, 3, 14, 14, 4),
+	ARM64_SYS_REG(3, 3, 14, 14, 5),
+	ARM64_SYS_REG(3, 3, 14, 14, 6),
+	ARM64_SYS_REG(3, 3, 14, 14, 7),
+	ARM64_SYS_REG(3, 3, 14, 15, 0),
+	ARM64_SYS_REG(3, 3, 14, 15, 1),
+	ARM64_SYS_REG(3, 3, 14, 15, 2),
+	ARM64_SYS_REG(3, 3, 14, 15, 3),
+	ARM64_SYS_REG(3, 3, 14, 15, 4),
+	ARM64_SYS_REG(3, 3, 14, 15, 5),
+	ARM64_SYS_REG(3, 3, 14, 15, 6),
+	ARM64_SYS_REG(3, 3, 14, 15, 7),	/* PMCCFILTR_EL0 */
+	ARM64_SYS_REG(3, 4, 3, 0, 0),	/* DACR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
+};
+static __u64 base_regs_n = ARRAY_SIZE(base_regs);
+
+static __u64 vregs[] = {
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+static __u64 vregs_n = ARRAY_SIZE(vregs);
+
+static __u64 sve_regs[] = {
+	KVM_REG_ARM64_SVE_VLS,
+	KVM_REG_ARM64_SVE_ZREG(0, 0),
+	KVM_REG_ARM64_SVE_ZREG(1, 0),
+	KVM_REG_ARM64_SVE_ZREG(2, 0),
+	KVM_REG_ARM64_SVE_ZREG(3, 0),
+	KVM_REG_ARM64_SVE_ZREG(4, 0),
+	KVM_REG_ARM64_SVE_ZREG(5, 0),
+	KVM_REG_ARM64_SVE_ZREG(6, 0),
+	KVM_REG_ARM64_SVE_ZREG(7, 0),
+	KVM_REG_ARM64_SVE_ZREG(8, 0),
+	KVM_REG_ARM64_SVE_ZREG(9, 0),
+	KVM_REG_ARM64_SVE_ZREG(10, 0),
+	KVM_REG_ARM64_SVE_ZREG(11, 0),
+	KVM_REG_ARM64_SVE_ZREG(12, 0),
+	KVM_REG_ARM64_SVE_ZREG(13, 0),
+	KVM_REG_ARM64_SVE_ZREG(14, 0),
+	KVM_REG_ARM64_SVE_ZREG(15, 0),
+	KVM_REG_ARM64_SVE_ZREG(16, 0),
+	KVM_REG_ARM64_SVE_ZREG(17, 0),
+	KVM_REG_ARM64_SVE_ZREG(18, 0),
+	KVM_REG_ARM64_SVE_ZREG(19, 0),
+	KVM_REG_ARM64_SVE_ZREG(20, 0),
+	KVM_REG_ARM64_SVE_ZREG(21, 0),
+	KVM_REG_ARM64_SVE_ZREG(22, 0),
+	KVM_REG_ARM64_SVE_ZREG(23, 0),
+	KVM_REG_ARM64_SVE_ZREG(24, 0),
+	KVM_REG_ARM64_SVE_ZREG(25, 0),
+	KVM_REG_ARM64_SVE_ZREG(26, 0),
+	KVM_REG_ARM64_SVE_ZREG(27, 0),
+	KVM_REG_ARM64_SVE_ZREG(28, 0),
+	KVM_REG_ARM64_SVE_ZREG(29, 0),
+	KVM_REG_ARM64_SVE_ZREG(30, 0),
+	KVM_REG_ARM64_SVE_ZREG(31, 0),
+	KVM_REG_ARM64_SVE_PREG(0, 0),
+	KVM_REG_ARM64_SVE_PREG(1, 0),
+	KVM_REG_ARM64_SVE_PREG(2, 0),
+	KVM_REG_ARM64_SVE_PREG(3, 0),
+	KVM_REG_ARM64_SVE_PREG(4, 0),
+	KVM_REG_ARM64_SVE_PREG(5, 0),
+	KVM_REG_ARM64_SVE_PREG(6, 0),
+	KVM_REG_ARM64_SVE_PREG(7, 0),
+	KVM_REG_ARM64_SVE_PREG(8, 0),
+	KVM_REG_ARM64_SVE_PREG(9, 0),
+	KVM_REG_ARM64_SVE_PREG(10, 0),
+	KVM_REG_ARM64_SVE_PREG(11, 0),
+	KVM_REG_ARM64_SVE_PREG(12, 0),
+	KVM_REG_ARM64_SVE_PREG(13, 0),
+	KVM_REG_ARM64_SVE_PREG(14, 0),
+	KVM_REG_ARM64_SVE_PREG(15, 0),
+	KVM_REG_ARM64_SVE_FFR(0),
+	ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
+};
+static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
+
+static __u64 rejects_set[] = {
+#ifdef REG_LIST_SVE
+	KVM_REG_ARM64_SVE_VLS,
+#endif
+};
+static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
new file mode 100644
index 000000000..63ed533f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/config
@@ -0,0 +1,3 @@
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
new file mode 100644
index 000000000..3d96a7bfa
--- /dev/null
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM demand paging test
+ * Adapted from dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#ifdef __NR_userfaultfd
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+static char *guest_data_prototype;
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	int vcpu_id = vcpu_args->vcpu_id;
+	struct kvm_vm *vm = perf_test_args.vm;
+	struct kvm_run *run;
+	struct timespec start;
+	struct timespec ts_diff;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+	run = vcpu_state(vm, vcpu_id);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	/* Let the guest access its memory */
+	ret = _vcpu_run(vm, vcpu_id);
+	TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+	if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+		TEST_ASSERT(false,
+			    "Invalid guest sync status: exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+	}
+
+	ts_diff = timespec_diff_now(start);
+	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+		       ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	return NULL;
+}
+
+static int handle_uffd_page_request(int uffd, uint64_t addr)
+{
+	pid_t tid;
+	struct timespec start;
+	struct timespec ts_diff;
+	struct uffdio_copy copy;
+	int r;
+
+	tid = syscall(__NR_gettid);
+
+	copy.src = (uint64_t)guest_data_prototype;
+	copy.dst = addr;
+	copy.len = perf_test_args.host_page_size;
+	copy.mode = 0;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	r = ioctl(uffd, UFFDIO_COPY, &copy);
+	if (r == -1) {
+		pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
+			addr, tid, errno);
+		return r;
+	}
+
+	ts_diff = timespec_diff_now(start);
+
+	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+		       timespec_to_ns(ts_diff));
+	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
+		       perf_test_args.host_page_size, addr, tid);
+
+	return 0;
+}
+
+bool quit_uffd_thread;
+
+struct uffd_handler_args {
+	int uffd;
+	int pipefd;
+	useconds_t delay;
+};
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+	struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
+	int uffd = uffd_args->uffd;
+	int pipefd = uffd_args->pipefd;
+	useconds_t delay = uffd_args->delay;
+	int64_t pages = 0;
+	struct timespec start;
+	struct timespec ts_diff;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	while (!quit_uffd_thread) {
+		struct uffd_msg msg;
+		struct pollfd pollfd[2];
+		char tmp_chr;
+		int r;
+		uint64_t addr;
+
+		pollfd[0].fd = uffd;
+		pollfd[0].events = POLLIN;
+		pollfd[1].fd = pipefd;
+		pollfd[1].events = POLLIN;
+
+		r = poll(pollfd, 2, -1);
+		switch (r) {
+		case -1:
+			pr_info("poll err");
+			continue;
+		case 0:
+			continue;
+		case 1:
+			break;
+		default:
+			pr_info("Polling uffd returned %d", r);
+			return NULL;
+		}
+
+		if (pollfd[0].revents & POLLERR) {
+			pr_info("uffd revents has POLLERR");
+			return NULL;
+		}
+
+		if (pollfd[1].revents & POLLIN) {
+			r = read(pollfd[1].fd, &tmp_chr, 1);
+			TEST_ASSERT(r == 1,
+				    "Error reading pipefd in UFFD thread\n");
+			return NULL;
+		}
+
+		if (!pollfd[0].revents & POLLIN)
+			continue;
+
+		r = read(uffd, &msg, sizeof(msg));
+		if (r == -1) {
+			if (errno == EAGAIN)
+				continue;
+			pr_info("Read of uffd gor errno %d", errno);
+			return NULL;
+		}
+
+		if (r != sizeof(msg)) {
+			pr_info("Read on uffd returned unexpected size: %d bytes", r);
+			return NULL;
+		}
+
+		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+			continue;
+
+		if (delay)
+			usleep(delay);
+		addr =  msg.arg.pagefault.address;
+		r = handle_uffd_page_request(uffd, addr);
+		if (r < 0)
+			return NULL;
+		pages++;
+	}
+
+	ts_diff = timespec_diff_now(start);
+	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+	return NULL;
+}
+
+static int setup_demand_paging(struct kvm_vm *vm,
+			       pthread_t *uffd_handler_thread, int pipefd,
+			       useconds_t uffd_delay,
+			       struct uffd_handler_args *uffd_args,
+			       void *hva, uint64_t len)
+{
+	int uffd;
+	struct uffdio_api uffdio_api;
+	struct uffdio_register uffdio_register;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd == -1) {
+		pr_info("uffd creation failed\n");
+		return -1;
+	}
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+		pr_info("ioctl uffdio_api failed\n");
+		return -1;
+	}
+
+	uffdio_register.range.start = (uint64_t)hva;
+	uffdio_register.range.len = len;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+		pr_info("ioctl uffdio_register failed\n");
+		return -1;
+	}
+
+	if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
+			UFFD_API_RANGE_IOCTLS) {
+		pr_info("unexpected userfaultfd ioctl set\n");
+		return -1;
+	}
+
+	uffd_args->uffd = uffd;
+	uffd_args->pipefd = pipefd;
+	uffd_args->delay = uffd_delay;
+	pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
+		       uffd_args);
+
+	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+		       hva, hva + len);
+
+	return 0;
+}
+
+static void run_test(enum vm_guest_mode mode, bool use_uffd,
+		     useconds_t uffd_delay)
+{
+	pthread_t *vcpu_threads;
+	pthread_t *uffd_handler_threads = NULL;
+	struct uffd_handler_args *uffd_args = NULL;
+	struct timespec start;
+	struct timespec ts_diff;
+	int *pipefds = NULL;
+	struct kvm_vm *vm;
+	int vcpu_id;
+	int r;
+
+	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+
+	perf_test_args.wr_fract = 1;
+
+	guest_data_prototype = malloc(perf_test_args.host_page_size);
+	TEST_ASSERT(guest_data_prototype,
+		    "Failed to allocate buffer for guest data pattern");
+	memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
+
+	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
+	if (use_uffd) {
+		uffd_handler_threads =
+			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
+		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
+
+		uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
+		TEST_ASSERT(uffd_args, "Memory allocation failed");
+
+		pipefds = malloc(sizeof(int) * nr_vcpus * 2);
+		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+			vm_paddr_t vcpu_gpa;
+			void *vcpu_hva;
+
+			vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
+			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+				       vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
+
+			/* Cache the HVA pointer of the region */
+			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+
+			/*
+			 * Set up user fault fd to handle demand paging
+			 * requests.
+			 */
+			r = pipe2(&pipefds[vcpu_id * 2],
+				  O_CLOEXEC | O_NONBLOCK);
+			TEST_ASSERT(!r, "Failed to set up pipefd");
+
+			r = setup_demand_paging(vm,
+						&uffd_handler_threads[vcpu_id],
+						pipefds[vcpu_id * 2],
+						uffd_delay, &uffd_args[vcpu_id],
+						vcpu_hva, guest_percpu_mem_size);
+			if (r < 0)
+				exit(-r);
+		}
+	}
+
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, perf_test_args);
+
+	pr_info("Finished creating vCPUs and starting uffd threads\n");
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+			       &perf_test_args.vcpu_args[vcpu_id]);
+	}
+
+	pr_info("Started all vCPUs\n");
+
+	/* Wait for the vcpu threads to quit */
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
+	}
+
+	ts_diff = timespec_diff_now(start);
+
+	pr_info("All vCPU threads joined\n");
+
+	if (use_uffd) {
+		char c;
+
+		/* Tell the user fault fd handler threads to quit */
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+			r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
+			TEST_ASSERT(r == 1, "Unable to write to pipefd");
+
+			pthread_join(uffd_handler_threads[vcpu_id], NULL);
+		}
+	}
+
+	pr_info("Total guest execution time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+	pr_info("Overall demand paging rate: %f pgs/sec\n",
+		perf_test_args.vcpu_args[0].pages * nr_vcpus /
+		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+
+	free(guest_data_prototype);
+	free(vcpu_threads);
+	if (use_uffd) {
+		free(uffd_handler_threads);
+		free(uffd_args);
+		free(pipefds);
+	}
+}
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
+	       "          [-b memory] [-v vcpus]\n", name);
+	printf(" -m: specify the guest mode ID to test\n"
+	       "     (default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+	printf(" -u: use User Fault FD to handle vCPU page\n"
+	       "     faults.\n");
+	printf(" -d: add a delay in usec to the User Fault\n"
+	       "     FD handler to simulate demand paging\n"
+	       "     overheads. Ignored without -u.\n");
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
+	       "     Default: 1G\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	bool mode_selected = false;
+	unsigned int mode;
+	int opt, i;
+	bool use_uffd = false;
+	useconds_t uffd_delay = 0;
+
+#ifdef __x86_64__
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+#ifdef __s390x__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+	while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+		switch (opt) {
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					guest_modes[i].enabled = false;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			guest_modes[mode].enabled = true;
+			break;
+		case 'u':
+			use_uffd = true;
+			break;
+		case 'd':
+			uffd_delay = strtoul(optarg, NULL, 0);
+			TEST_ASSERT(uffd_delay >= 0,
+				    "A negative UFFD delay is not supported.");
+			break;
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
+		case 'v':
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		run_test(i, use_uffd, uffd_delay);
+	}
+
+	return 0;
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+	print_skip("__NR_userfaultfd must be present for userfaultfd test");
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
new file mode 100644
index 000000000..85c9b8f73
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging performance test
+ *
+ * Based on dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
+#define TEST_HOST_LOOP_N		2UL
+
+/* Host variables */
+static bool host_quit;
+static uint64_t iteration;
+static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct kvm_vm *vm = perf_test_args.vm;
+	uint64_t pages_count = 0;
+	struct kvm_run *run;
+	struct timespec start;
+	struct timespec ts_diff;
+	struct timespec total = (struct timespec){0};
+	struct timespec avg;
+	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	int vcpu_id = vcpu_args->vcpu_id;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+	run = vcpu_state(vm, vcpu_id);
+
+	while (!READ_ONCE(host_quit)) {
+		uint64_t current_iteration = READ_ONCE(iteration);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = _vcpu_run(vm, vcpu_id);
+		ts_diff = timespec_diff_now(start);
+
+		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+		TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+			    "Invalid guest sync status: exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+
+		pr_debug("Got sync event from vCPU %d\n", vcpu_id);
+		vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+		pr_debug("vCPU %d updated last completed iteration to %lu\n",
+			 vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+
+		if (current_iteration) {
+			pages_count += vcpu_args->pages;
+			total = timespec_add(total, ts_diff);
+			pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+				vcpu_id, current_iteration, ts_diff.tv_sec,
+				ts_diff.tv_nsec);
+		} else {
+			pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+				vcpu_id, current_iteration, ts_diff.tv_sec,
+				ts_diff.tv_nsec);
+		}
+
+		while (current_iteration == READ_ONCE(iteration) &&
+		       !READ_ONCE(host_quit)) {}
+	}
+
+	avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+	pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+		total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+	return NULL;
+}
+
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     uint64_t phys_offset, int wr_fract)
+{
+	pthread_t *vcpu_threads;
+	struct kvm_vm *vm;
+	unsigned long *bmap;
+	uint64_t guest_num_pages;
+	uint64_t host_num_pages;
+	int vcpu_id;
+	struct timespec start;
+	struct timespec ts_diff;
+	struct timespec get_dirty_log_total = (struct timespec){0};
+	struct timespec vcpu_dirty_total = (struct timespec){0};
+	struct timespec avg;
+#ifdef USE_CLEAR_DIRTY_LOG
+	struct kvm_enable_cap cap = {};
+	struct timespec clear_dirty_log_total = (struct timespec){0};
+#endif
+
+	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+
+	perf_test_args.wr_fract = wr_fract;
+
+	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+	bmap = bitmap_alloc(host_num_pages);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+	cap.args[0] = dirty_log_manual_caps;
+	vm_enable_cap(vm, &cap);
+#endif
+
+	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
+	sync_global_to_guest(vm, perf_test_args);
+
+	/* Start the iterations */
+	iteration = 0;
+	host_quit = false;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+			       &perf_test_args.vcpu_args[vcpu_id]);
+	}
+
+	/* Allow the vCPU to populate memory */
+	pr_debug("Starting iteration %lu - Populating\n", iteration);
+	while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+		pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
+			iteration);
+
+	ts_diff = timespec_diff_now(start);
+	pr_info("Populate memory time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	/* Enable dirty logging */
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+				KVM_MEM_LOG_DIRTY_PAGES);
+	ts_diff = timespec_diff_now(start);
+	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	while (iteration < iterations) {
+		/*
+		 * Incrementing the iteration number will start the vCPUs
+		 * dirtying memory again.
+		 */
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		iteration++;
+
+		pr_debug("Starting iteration %lu\n", iteration);
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+			while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+				pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
+					 vcpu_id, iteration);
+		}
+
+		ts_diff = timespec_diff_now(start);
+		vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
+		pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+		ts_diff = timespec_diff_now(start);
+		get_dirty_log_total = timespec_add(get_dirty_log_total,
+						   ts_diff);
+		pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+				       host_num_pages);
+
+		ts_diff = timespec_diff_now(start);
+		clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+						     ts_diff);
+		pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+#endif
+	}
+
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+
+	/* Disable dirty logging */
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+	ts_diff = timespec_diff_now(start);
+	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	avg = timespec_div(get_dirty_log_total, iterations);
+	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		iterations, get_dirty_log_total.tv_sec,
+		get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	avg = timespec_div(clear_dirty_log_total, iterations);
+	pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		iterations, clear_dirty_log_total.tv_sec,
+		clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+#endif
+
+	free(bmap);
+	free(vcpu_threads);
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-p offset] "
+	       "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -p: specify guest physical test memory offset\n"
+	       "     Warning: a low offset can conflict with the loaded test code.\n");
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+	       "     (default: 1G)\n");
+	printf(" -f: specify the fraction of pages which should be written to\n"
+	       "     as opposed to simply read, in the form\n"
+	       "     1/<fraction of pages to write>.\n"
+	       "     (default: 1 i.e. all pages are written to.)\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	bool mode_selected = false;
+	uint64_t phys_offset = 0;
+	unsigned int mode;
+	int opt, i;
+	int wr_fract = 1;
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	dirty_log_manual_caps =
+		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+	if (!dirty_log_manual_caps) {
+		print_skip("KVM_CLEAR_DIRTY_LOG not available");
+		exit(KSFT_SKIP);
+	}
+	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+				  KVM_DIRTY_LOG_INITIALLY_SET);
+#endif
+
+#ifdef __x86_64__
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+#ifdef __s390x__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'p':
+			phys_offset = strtoull(optarg, NULL, 0);
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					guest_modes[i].enabled = false;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			guest_modes[mode].enabled = true;
+			break;
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
+		case 'f':
+			wr_fract = atoi(optarg);
+			TEST_ASSERT(wr_fract >= 1,
+				    "Write fraction cannot be less than one");
+			break;
+		case 'v':
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0,
+				    "Must have a positive number of vCPUs");
+			TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
+				    "This test does not currently support\n"
+				    "more than %d vCPUs.", MAX_VCPUS);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
+
+	pr_info("Test iterations: %"PRIu64"\n",	iterations);
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		run_test(i, iterations, phys_offset, wr_fract);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644
index 000000000..54da9cc20
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID				1
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM		0xc0000000
+
+/* How many pages to dirty for each guest loop */
+#define TEST_PAGES_PER_LOOP		1024
+
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define TEST_HOST_LOOP_N		32UL
+
+/* Interval for each host loop (ms) */
+#define TEST_HOST_LOOP_INTERVAL		10UL
+
+/* Dirty bitmaps are always little endian, so we need to swap on big endian */
+#if defined(__s390x__)
+# define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
+# define test_bit_le(nr, addr) \
+	test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
+	set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
+	clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
+	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
+	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else
+# define test_bit_le		test_bit
+# define set_bit_le		set_bit
+# define clear_bit_le		clear_bit
+# define test_and_set_bit_le	test_and_set_bit
+# define test_and_clear_bit_le	test_and_clear_bit
+#endif
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+static uint64_t guest_num_pages;
+static uint64_t random_array[TEST_PAGES_PER_LOOP];
+static uint64_t iteration;
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+/*
+ * Continuously write to the first 8 bytes of a random pages within
+ * the testing memory region.
+ */
+static void guest_code(void)
+{
+	uint64_t addr;
+	int i;
+
+	/*
+	 * On s390x, all pages of a 1M segment are initially marked as dirty
+	 * when a page of the segment is written to for the very first time.
+	 * To compensate this specialty in this test, we need to touch all
+	 * pages during the first iteration.
+	 */
+	for (i = 0; i < guest_num_pages; i++) {
+		addr = guest_test_virt_mem + i * guest_page_size;
+		*(uint64_t *)addr = READ_ONCE(iteration);
+	}
+
+	while (true) {
+		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+			addr = guest_test_virt_mem;
+			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+				* guest_page_size;
+			addr &= ~(host_page_size - 1);
+			*(uint64_t *)addr = READ_ONCE(iteration);
+		}
+
+		/* Tell the host that we need more random numbers */
+		GUEST_SYNC(1);
+	}
+}
+
+/* Host variables */
+static bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+static void *host_test_mem;
+static uint64_t host_num_pages;
+
+/* For statistics only */
+static uint64_t host_dirty_count;
+static uint64_t host_clear_count;
+static uint64_t host_track_next_count;
+
+enum log_mode_t {
+	/* Only use KVM_GET_DIRTY_LOG for logging */
+	LOG_MODE_DIRTY_LOG = 0,
+
+	/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
+	LOG_MODE_CLEAR_LOG = 1,
+
+	LOG_MODE_NUM,
+
+	/* Run all supported modes */
+	LOG_MODE_ALL = LOG_MODE_NUM,
+};
+
+/* Mode of logging to test.  Default is to run all supported modes */
+static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
+/* Logging mode for current run */
+static enum log_mode_t host_log_mode;
+
+static bool clear_log_supported(void)
+{
+	return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+}
+
+static void clear_log_create_vm_done(struct kvm_vm *vm)
+{
+	struct kvm_enable_cap cap = {};
+	u64 manual_caps;
+
+	manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+	TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
+	manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+			KVM_DIRTY_LOG_INITIALLY_SET);
+	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+	cap.args[0] = manual_caps;
+	vm_enable_cap(vm, &cap);
+}
+
+static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					  void *bitmap, uint32_t num_pages)
+{
+	kvm_vm_get_dirty_log(vm, slot, bitmap);
+}
+
+static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					  void *bitmap, uint32_t num_pages)
+{
+	kvm_vm_get_dirty_log(vm, slot, bitmap);
+	kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+}
+
+struct log_mode {
+	const char *name;
+	/* Return true if this mode is supported, otherwise false */
+	bool (*supported)(void);
+	/* Hook when the vm creation is done (before vcpu creation) */
+	void (*create_vm_done)(struct kvm_vm *vm);
+	/* Hook to collect the dirty pages into the bitmap provided */
+	void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
+				     void *bitmap, uint32_t num_pages);
+} log_modes[LOG_MODE_NUM] = {
+	{
+		.name = "dirty-log",
+		.collect_dirty_pages = dirty_log_collect_dirty_pages,
+	},
+	{
+		.name = "clear-log",
+		.supported = clear_log_supported,
+		.create_vm_done = clear_log_create_vm_done,
+		.collect_dirty_pages = clear_log_collect_dirty_pages,
+	},
+};
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration.  For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+static unsigned long *host_bmap_track;
+
+static void log_modes_dump(void)
+{
+	int i;
+
+	printf("all");
+	for (i = 0; i < LOG_MODE_NUM; i++)
+		printf(", %s", log_modes[i].name);
+	printf("\n");
+}
+
+static bool log_mode_supported(void)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->supported)
+		return mode->supported();
+
+	return true;
+}
+
+static void log_mode_create_vm_done(struct kvm_vm *vm)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->create_vm_done)
+		mode->create_vm_done(vm);
+}
+
+static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					 void *bitmap, uint32_t num_pages)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	TEST_ASSERT(mode->collect_dirty_pages != NULL,
+		    "collect_dirty_pages() is required for any log mode!");
+	mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+}
+
+static void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+	uint64_t i;
+
+	for (i = 0; i < size; i++)
+		guest_array[i] = random();
+}
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct kvm_vm *vm = data;
+	uint64_t *guest_array;
+	uint64_t pages_count = 0;
+	struct kvm_run *run;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
+	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+	while (!READ_ONCE(host_quit)) {
+		/* Let the guest dirty the random pages */
+		ret = _vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+		if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+			pages_count += TEST_PAGES_PER_LOOP;
+			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+		} else {
+			TEST_FAIL("Invalid guest sync status: "
+				  "exit_reason=%s\n",
+				  exit_reason_str(run->exit_reason));
+		}
+	}
+
+	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
+
+	return NULL;
+}
+
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+{
+	uint64_t step = vm_num_host_pages(mode, 1);
+	uint64_t page;
+	uint64_t *value_ptr;
+
+	for (page = 0; page < host_num_pages; page += step) {
+		value_ptr = host_test_mem + page * host_page_size;
+
+		/* If this is a special page that we were tracking... */
+		if (test_and_clear_bit_le(page, host_bmap_track)) {
+			host_track_next_count++;
+			TEST_ASSERT(test_bit_le(page, bmap),
+				    "Page %"PRIu64" should have its dirty bit "
+				    "set in this iteration but it is missing",
+				    page);
+		}
+
+		if (test_and_clear_bit_le(page, bmap)) {
+			host_dirty_count++;
+			/*
+			 * If the bit is set, the value written onto
+			 * the corresponding page should be either the
+			 * previous iteration number or the current one.
+			 */
+			TEST_ASSERT(*value_ptr == iteration ||
+				    *value_ptr == iteration - 1,
+				    "Set page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+		} else {
+			host_clear_count++;
+			/*
+			 * If cleared, the value written can be any
+			 * value smaller or equals to the iteration
+			 * number.  Note that the value can be exactly
+			 * (iteration-1) if that write can happen
+			 * like this:
+			 *
+			 * (1) increase loop count to "iteration-1"
+			 * (2) write to page P happens (with value
+			 *     "iteration-1")
+			 * (3) get dirty log for "iteration-1"; we'll
+			 *     see that page P bit is set (dirtied),
+			 *     and not set the bit in host_bmap_track
+			 * (4) increase loop count to "iteration"
+			 *     (which is current iteration)
+			 * (5) get dirty log for current iteration,
+			 *     we'll see that page P is cleared, with
+			 *     value "iteration-1".
+			 */
+			TEST_ASSERT(*value_ptr <= iteration,
+				    "Clear page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+			if (*value_ptr == iteration) {
+				/*
+				 * This page is _just_ modified; it
+				 * should report its dirtyness in the
+				 * next run
+				 */
+				set_bit_le(page, host_bmap_track);
+			}
+		}
+	}
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+				uint64_t extra_mem_pages, void *guest_code)
+{
+	struct kvm_vm *vm;
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	log_mode_create_vm_done(vm);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+	return vm;
+}
+
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K  12
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     unsigned long interval, uint64_t phys_offset)
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	unsigned long *bmap;
+
+	if (!log_mode_supported()) {
+		print_skip("Log mode '%s' not supported",
+			   log_modes[host_log_mode].name);
+		return;
+	}
+
+	/*
+	 * We reserve page table for 2 times of extra dirty mem which
+	 * will definitely cover the original (1G+) test range.  Here
+	 * we do the calculation with 4K page size which is the
+	 * smallest so the page number will be enough for all archs
+	 * (e.g., 64K page size guest will need even less memory for
+	 * page tables).
+	 */
+	vm = create_vm(mode, VCPU_ID,
+		       2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
+		       guest_code);
+
+	guest_page_size = vm_get_page_size(vm);
+	/*
+	 * A little more than 1G of guest page sized pages.  Cover the
+	 * case where the size is not aligned to 64 pages.
+	 */
+	guest_num_pages = (1ul << (DIRTY_MEM_BITS -
+				   vm_get_page_shift(vm))) + 3;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+	host_page_size = getpagesize();
+	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+
+	if (!phys_offset) {
+		guest_test_phys_mem = (vm_get_max_gfn(vm) -
+				       guest_num_pages) * guest_page_size;
+		guest_test_phys_mem &= ~(host_page_size - 1);
+	} else {
+		guest_test_phys_mem = phys_offset;
+	}
+
+#ifdef __s390x__
+	/* Align to 1M (segment size) */
+	guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+	bmap = bitmap_alloc(host_num_pages);
+	host_bmap_track = bitmap_alloc(host_num_pages);
+
+	/* Add an extra memory slot for testing dirty logging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_phys_mem,
+				    TEST_MEM_SLOT_INDEX,
+				    guest_num_pages,
+				    KVM_MEM_LOG_DIRTY_PAGES);
+
+	/* Do mapping for the dirty track memory slot */
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+	/* Cache the HVA pointer of the region */
+	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
+
+#ifdef __x86_64__
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+#endif
+	ucall_init(vm, NULL);
+
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, host_page_size);
+	sync_global_to_guest(vm, guest_page_size);
+	sync_global_to_guest(vm, guest_test_virt_mem);
+	sync_global_to_guest(vm, guest_num_pages);
+
+	/* Start the iterations */
+	iteration = 1;
+	sync_global_to_guest(vm, iteration);
+	host_quit = false;
+	host_dirty_count = 0;
+	host_clear_count = 0;
+	host_track_next_count = 0;
+
+	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+	while (iteration < iterations) {
+		/* Give the vcpu thread some time to dirty some pages */
+		usleep(interval * 1000);
+		log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+					     bmap, host_num_pages);
+		vm_dirty_log_verify(mode, bmap);
+		iteration++;
+		sync_global_to_guest(vm, iteration);
+	}
+
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	pthread_join(vcpu_thread, NULL);
+
+	pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+		"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+		host_track_next_count);
+
+	free(bmap);
+	free(host_bmap_track);
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-I interval] "
+	       "[-p offset] [-m mode]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+	       TEST_HOST_LOOP_INTERVAL);
+	printf(" -p: specify guest physical test memory offset\n"
+	       "     Warning: a low offset can conflict with the loaded test code.\n");
+	printf(" -M: specify the host logging mode "
+	       "(default: run all log modes).  Supported modes: \n\t");
+	log_modes_dump();
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	unsigned long interval = TEST_HOST_LOOP_INTERVAL;
+	bool mode_selected = false;
+	uint64_t phys_offset = 0;
+	unsigned int mode;
+	int opt, i, j;
+
+#ifdef __x86_64__
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+#ifdef __s390x__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+	while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'I':
+			interval = strtol(optarg, NULL, 10);
+			break;
+		case 'p':
+			phys_offset = strtoull(optarg, NULL, 0);
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					guest_modes[i].enabled = false;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			guest_modes[mode].enabled = true;
+			break;
+		case 'M':
+			if (!strcmp(optarg, "all")) {
+				host_log_mode_option = LOG_MODE_ALL;
+				break;
+			}
+			for (i = 0; i < LOG_MODE_NUM; i++) {
+				if (!strcmp(optarg, log_modes[i].name)) {
+					pr_info("Setting log mode to: '%s'\n",
+						optarg);
+					host_log_mode_option = i;
+					break;
+				}
+			}
+			if (i == LOG_MODE_NUM) {
+				printf("Log mode '%s' invalid. Please choose "
+				       "from: ", optarg);
+				log_modes_dump();
+				exit(1);
+			}
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+
+	pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+		iterations, interval);
+
+	srandom(time(0));
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		if (host_log_mode_option == LOG_MODE_ALL) {
+			/* Run each log mode */
+			for (j = 0; j < LOG_MODE_NUM; j++) {
+				pr_info("Testing Log Mode '%s'\n",
+					log_modes[j].name);
+				host_log_mode = j;
+				run_test(i, iterations, interval, phys_offset);
+			}
+		} else {
+			host_log_mode = host_log_mode_option;
+			run_test(i, iterations, interval, phys_offset);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
new file mode 100644
index 000000000..b7fa0c855
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define CPACR_EL1	3, 0,  1, 0, 2
+#define TCR_EL1		3, 0,  2, 0, 2
+#define MAIR_EL1	3, 0, 10, 2, 0
+#define TTBR0_EL1	3, 0,  2, 0, 0
+#define SCTLR_EL1	3, 0,  1, 0, 0
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
+			  (0x04ul << (1 * 8)) | \
+			  (0x0cul << (2 * 8)) | \
+			  (0x44ul << (3 * 8)) | \
+			  (0xfful << (4 * 8)) | \
+			  (0xbbul << (5 * 8)))
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)addr;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)&val;
+	vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+}
+
+void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
+void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
+			      struct kvm_vcpu_init *init, void *guest_code);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
new file mode 100644
index 000000000..a034438b6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -0,0 +1,1102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define EVMCS_VERSION 1
+
+extern bool enable_evmcs;
+
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved;
+	__u64 vtl_control[2];
+	__u64 nested_enlightenments_control[2];
+	__u32 enlighten_vmentry;
+	__u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+	u32 revision_id;
+	u32 abort;
+
+	u16 host_es_selector;
+	u16 host_cs_selector;
+	u16 host_ss_selector;
+	u16 host_ds_selector;
+	u16 host_fs_selector;
+	u16 host_gs_selector;
+	u16 host_tr_selector;
+
+	u64 host_ia32_pat;
+	u64 host_ia32_efer;
+
+	u64 host_cr0;
+	u64 host_cr3;
+	u64 host_cr4;
+
+	u64 host_ia32_sysenter_esp;
+	u64 host_ia32_sysenter_eip;
+	u64 host_rip;
+	u32 host_ia32_sysenter_cs;
+
+	u32 pin_based_vm_exec_control;
+	u32 vm_exit_controls;
+	u32 secondary_vm_exec_control;
+
+	u64 io_bitmap_a;
+	u64 io_bitmap_b;
+	u64 msr_bitmap;
+
+	u16 guest_es_selector;
+	u16 guest_cs_selector;
+	u16 guest_ss_selector;
+	u16 guest_ds_selector;
+	u16 guest_fs_selector;
+	u16 guest_gs_selector;
+	u16 guest_ldtr_selector;
+	u16 guest_tr_selector;
+
+	u32 guest_es_limit;
+	u32 guest_cs_limit;
+	u32 guest_ss_limit;
+	u32 guest_ds_limit;
+	u32 guest_fs_limit;
+	u32 guest_gs_limit;
+	u32 guest_ldtr_limit;
+	u32 guest_tr_limit;
+	u32 guest_gdtr_limit;
+	u32 guest_idtr_limit;
+
+	u32 guest_es_ar_bytes;
+	u32 guest_cs_ar_bytes;
+	u32 guest_ss_ar_bytes;
+	u32 guest_ds_ar_bytes;
+	u32 guest_fs_ar_bytes;
+	u32 guest_gs_ar_bytes;
+	u32 guest_ldtr_ar_bytes;
+	u32 guest_tr_ar_bytes;
+
+	u64 guest_es_base;
+	u64 guest_cs_base;
+	u64 guest_ss_base;
+	u64 guest_ds_base;
+	u64 guest_fs_base;
+	u64 guest_gs_base;
+	u64 guest_ldtr_base;
+	u64 guest_tr_base;
+	u64 guest_gdtr_base;
+	u64 guest_idtr_base;
+
+	u64 padding64_1[3];
+
+	u64 vm_exit_msr_store_addr;
+	u64 vm_exit_msr_load_addr;
+	u64 vm_entry_msr_load_addr;
+
+	u64 cr3_target_value0;
+	u64 cr3_target_value1;
+	u64 cr3_target_value2;
+	u64 cr3_target_value3;
+
+	u32 page_fault_error_code_mask;
+	u32 page_fault_error_code_match;
+
+	u32 cr3_target_count;
+	u32 vm_exit_msr_store_count;
+	u32 vm_exit_msr_load_count;
+	u32 vm_entry_msr_load_count;
+
+	u64 tsc_offset;
+	u64 virtual_apic_page_addr;
+	u64 vmcs_link_pointer;
+
+	u64 guest_ia32_debugctl;
+	u64 guest_ia32_pat;
+	u64 guest_ia32_efer;
+
+	u64 guest_pdptr0;
+	u64 guest_pdptr1;
+	u64 guest_pdptr2;
+	u64 guest_pdptr3;
+
+	u64 guest_pending_dbg_exceptions;
+	u64 guest_sysenter_esp;
+	u64 guest_sysenter_eip;
+
+	u32 guest_activity_state;
+	u32 guest_sysenter_cs;
+
+	u64 cr0_guest_host_mask;
+	u64 cr4_guest_host_mask;
+	u64 cr0_read_shadow;
+	u64 cr4_read_shadow;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 guest_dr7;
+
+	u64 host_fs_base;
+	u64 host_gs_base;
+	u64 host_tr_base;
+	u64 host_gdtr_base;
+	u64 host_idtr_base;
+	u64 host_rsp;
+
+	u64 ept_pointer;
+
+	u16 virtual_processor_id;
+	u16 padding16[3];
+
+	u64 padding64_2[5];
+	u64 guest_physical_address;
+
+	u32 vm_instruction_error;
+	u32 vm_exit_reason;
+	u32 vm_exit_intr_info;
+	u32 vm_exit_intr_error_code;
+	u32 idt_vectoring_info_field;
+	u32 idt_vectoring_error_code;
+	u32 vm_exit_instruction_len;
+	u32 vmx_instruction_info;
+
+	u64 exit_qualification;
+	u64 exit_io_instruction_ecx;
+	u64 exit_io_instruction_esi;
+	u64 exit_io_instruction_edi;
+	u64 exit_io_instruction_eip;
+
+	u64 guest_linear_address;
+	u64 guest_rsp;
+	u64 guest_rflags;
+
+	u32 guest_interruptibility_info;
+	u32 cpu_based_vm_exec_control;
+	u32 exception_bitmap;
+	u32 vm_entry_controls;
+	u32 vm_entry_intr_info_field;
+	u32 vm_entry_exception_error_code;
+	u32 vm_entry_instruction_len;
+	u32 tpr_threshold;
+
+	u64 guest_rip;
+
+	u32 hv_clean_fields;
+	u32 hv_padding_32;
+	u32 hv_synthetic_controls;
+	struct {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 reserved:30;
+	} hv_enlightenments_control;
+	u32 hv_vp_id;
+
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 padding64_4[4];
+	u64 guest_bndcfgs;
+	u64 padding64_5[7];
+	u64 xss_exit_bitmap;
+	u64 padding64_6[7];
+};
+
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+extern struct hv_enlightened_vmcs *current_evmcs;
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
+
+static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+	u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+	current_vp_assist = vp_assist;
+
+	enable_evmcs = true;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+	current_vp_assist->current_nested_vmcs = vmcs_pa;
+	current_vp_assist->enlighten_vmentry = 1;
+
+	current_evmcs = vmcs;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+	*value = current_vp_assist->current_nested_vmcs &
+		~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		*value = current_evmcs->guest_rip;
+		break;
+	case GUEST_RSP:
+		*value = current_evmcs->guest_rsp;
+		break;
+	case GUEST_RFLAGS:
+		*value = current_evmcs->guest_rflags;
+		break;
+	case HOST_IA32_PAT:
+		*value = current_evmcs->host_ia32_pat;
+		break;
+	case HOST_IA32_EFER:
+		*value = current_evmcs->host_ia32_efer;
+		break;
+	case HOST_CR0:
+		*value = current_evmcs->host_cr0;
+		break;
+	case HOST_CR3:
+		*value = current_evmcs->host_cr3;
+		break;
+	case HOST_CR4:
+		*value = current_evmcs->host_cr4;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		*value = current_evmcs->host_ia32_sysenter_esp;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		*value = current_evmcs->host_ia32_sysenter_eip;
+		break;
+	case HOST_RIP:
+		*value = current_evmcs->host_rip;
+		break;
+	case IO_BITMAP_A:
+		*value = current_evmcs->io_bitmap_a;
+		break;
+	case IO_BITMAP_B:
+		*value = current_evmcs->io_bitmap_b;
+		break;
+	case MSR_BITMAP:
+		*value = current_evmcs->msr_bitmap;
+		break;
+	case GUEST_ES_BASE:
+		*value = current_evmcs->guest_es_base;
+		break;
+	case GUEST_CS_BASE:
+		*value = current_evmcs->guest_cs_base;
+		break;
+	case GUEST_SS_BASE:
+		*value = current_evmcs->guest_ss_base;
+		break;
+	case GUEST_DS_BASE:
+		*value = current_evmcs->guest_ds_base;
+		break;
+	case GUEST_FS_BASE:
+		*value = current_evmcs->guest_fs_base;
+		break;
+	case GUEST_GS_BASE:
+		*value = current_evmcs->guest_gs_base;
+		break;
+	case GUEST_LDTR_BASE:
+		*value = current_evmcs->guest_ldtr_base;
+		break;
+	case GUEST_TR_BASE:
+		*value = current_evmcs->guest_tr_base;
+		break;
+	case GUEST_GDTR_BASE:
+		*value = current_evmcs->guest_gdtr_base;
+		break;
+	case GUEST_IDTR_BASE:
+		*value = current_evmcs->guest_idtr_base;
+		break;
+	case TSC_OFFSET:
+		*value = current_evmcs->tsc_offset;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		*value = current_evmcs->virtual_apic_page_addr;
+		break;
+	case VMCS_LINK_POINTER:
+		*value = current_evmcs->vmcs_link_pointer;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		*value = current_evmcs->guest_ia32_debugctl;
+		break;
+	case GUEST_IA32_PAT:
+		*value = current_evmcs->guest_ia32_pat;
+		break;
+	case GUEST_IA32_EFER:
+		*value = current_evmcs->guest_ia32_efer;
+		break;
+	case GUEST_PDPTR0:
+		*value = current_evmcs->guest_pdptr0;
+		break;
+	case GUEST_PDPTR1:
+		*value = current_evmcs->guest_pdptr1;
+		break;
+	case GUEST_PDPTR2:
+		*value = current_evmcs->guest_pdptr2;
+		break;
+	case GUEST_PDPTR3:
+		*value = current_evmcs->guest_pdptr3;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		*value = current_evmcs->guest_pending_dbg_exceptions;
+		break;
+	case GUEST_SYSENTER_ESP:
+		*value = current_evmcs->guest_sysenter_esp;
+		break;
+	case GUEST_SYSENTER_EIP:
+		*value = current_evmcs->guest_sysenter_eip;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		*value = current_evmcs->cr0_guest_host_mask;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		*value = current_evmcs->cr4_guest_host_mask;
+		break;
+	case CR0_READ_SHADOW:
+		*value = current_evmcs->cr0_read_shadow;
+		break;
+	case CR4_READ_SHADOW:
+		*value = current_evmcs->cr4_read_shadow;
+		break;
+	case GUEST_CR0:
+		*value = current_evmcs->guest_cr0;
+		break;
+	case GUEST_CR3:
+		*value = current_evmcs->guest_cr3;
+		break;
+	case GUEST_CR4:
+		*value = current_evmcs->guest_cr4;
+		break;
+	case GUEST_DR7:
+		*value = current_evmcs->guest_dr7;
+		break;
+	case HOST_FS_BASE:
+		*value = current_evmcs->host_fs_base;
+		break;
+	case HOST_GS_BASE:
+		*value = current_evmcs->host_gs_base;
+		break;
+	case HOST_TR_BASE:
+		*value = current_evmcs->host_tr_base;
+		break;
+	case HOST_GDTR_BASE:
+		*value = current_evmcs->host_gdtr_base;
+		break;
+	case HOST_IDTR_BASE:
+		*value = current_evmcs->host_idtr_base;
+		break;
+	case HOST_RSP:
+		*value = current_evmcs->host_rsp;
+		break;
+	case EPT_POINTER:
+		*value = current_evmcs->ept_pointer;
+		break;
+	case GUEST_BNDCFGS:
+		*value = current_evmcs->guest_bndcfgs;
+		break;
+	case XSS_EXIT_BITMAP:
+		*value = current_evmcs->xss_exit_bitmap;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		*value = current_evmcs->guest_physical_address;
+		break;
+	case EXIT_QUALIFICATION:
+		*value = current_evmcs->exit_qualification;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		*value = current_evmcs->guest_linear_address;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		*value = current_evmcs->vm_exit_msr_store_addr;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_exit_msr_load_addr;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_entry_msr_load_addr;
+		break;
+	case CR3_TARGET_VALUE0:
+		*value = current_evmcs->cr3_target_value0;
+		break;
+	case CR3_TARGET_VALUE1:
+		*value = current_evmcs->cr3_target_value1;
+		break;
+	case CR3_TARGET_VALUE2:
+		*value = current_evmcs->cr3_target_value2;
+		break;
+	case CR3_TARGET_VALUE3:
+		*value = current_evmcs->cr3_target_value3;
+		break;
+	case TPR_THRESHOLD:
+		*value = current_evmcs->tpr_threshold;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		*value = current_evmcs->guest_interruptibility_info;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->cpu_based_vm_exec_control;
+		break;
+	case EXCEPTION_BITMAP:
+		*value = current_evmcs->exception_bitmap;
+		break;
+	case VM_ENTRY_CONTROLS:
+		*value = current_evmcs->vm_entry_controls;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		*value = current_evmcs->vm_entry_intr_info_field;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		*value = current_evmcs->vm_entry_exception_error_code;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_entry_instruction_len;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		*value = current_evmcs->host_ia32_sysenter_cs;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->pin_based_vm_exec_control;
+		break;
+	case VM_EXIT_CONTROLS:
+		*value = current_evmcs->vm_exit_controls;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		*value = current_evmcs->secondary_vm_exec_control;
+		break;
+	case GUEST_ES_LIMIT:
+		*value = current_evmcs->guest_es_limit;
+		break;
+	case GUEST_CS_LIMIT:
+		*value = current_evmcs->guest_cs_limit;
+		break;
+	case GUEST_SS_LIMIT:
+		*value = current_evmcs->guest_ss_limit;
+		break;
+	case GUEST_DS_LIMIT:
+		*value = current_evmcs->guest_ds_limit;
+		break;
+	case GUEST_FS_LIMIT:
+		*value = current_evmcs->guest_fs_limit;
+		break;
+	case GUEST_GS_LIMIT:
+		*value = current_evmcs->guest_gs_limit;
+		break;
+	case GUEST_LDTR_LIMIT:
+		*value = current_evmcs->guest_ldtr_limit;
+		break;
+	case GUEST_TR_LIMIT:
+		*value = current_evmcs->guest_tr_limit;
+		break;
+	case GUEST_GDTR_LIMIT:
+		*value = current_evmcs->guest_gdtr_limit;
+		break;
+	case GUEST_IDTR_LIMIT:
+		*value = current_evmcs->guest_idtr_limit;
+		break;
+	case GUEST_ES_AR_BYTES:
+		*value = current_evmcs->guest_es_ar_bytes;
+		break;
+	case GUEST_CS_AR_BYTES:
+		*value = current_evmcs->guest_cs_ar_bytes;
+		break;
+	case GUEST_SS_AR_BYTES:
+		*value = current_evmcs->guest_ss_ar_bytes;
+		break;
+	case GUEST_DS_AR_BYTES:
+		*value = current_evmcs->guest_ds_ar_bytes;
+		break;
+	case GUEST_FS_AR_BYTES:
+		*value = current_evmcs->guest_fs_ar_bytes;
+		break;
+	case GUEST_GS_AR_BYTES:
+		*value = current_evmcs->guest_gs_ar_bytes;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		*value = current_evmcs->guest_ldtr_ar_bytes;
+		break;
+	case GUEST_TR_AR_BYTES:
+		*value = current_evmcs->guest_tr_ar_bytes;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		*value = current_evmcs->guest_activity_state;
+		break;
+	case GUEST_SYSENTER_CS:
+		*value = current_evmcs->guest_sysenter_cs;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		*value = current_evmcs->vm_instruction_error;
+		break;
+	case VM_EXIT_REASON:
+		*value = current_evmcs->vm_exit_reason;
+		break;
+	case VM_EXIT_INTR_INFO:
+		*value = current_evmcs->vm_exit_intr_info;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		*value = current_evmcs->vm_exit_intr_error_code;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		*value = current_evmcs->idt_vectoring_info_field;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		*value = current_evmcs->idt_vectoring_error_code;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_exit_instruction_len;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		*value = current_evmcs->vmx_instruction_info;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		*value = current_evmcs->page_fault_error_code_mask;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		*value = current_evmcs->page_fault_error_code_match;
+		break;
+	case CR3_TARGET_COUNT:
+		*value = current_evmcs->cr3_target_count;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		*value = current_evmcs->vm_exit_msr_store_count;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_exit_msr_load_count;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_entry_msr_load_count;
+		break;
+	case HOST_ES_SELECTOR:
+		*value = current_evmcs->host_es_selector;
+		break;
+	case HOST_CS_SELECTOR:
+		*value = current_evmcs->host_cs_selector;
+		break;
+	case HOST_SS_SELECTOR:
+		*value = current_evmcs->host_ss_selector;
+		break;
+	case HOST_DS_SELECTOR:
+		*value = current_evmcs->host_ds_selector;
+		break;
+	case HOST_FS_SELECTOR:
+		*value = current_evmcs->host_fs_selector;
+		break;
+	case HOST_GS_SELECTOR:
+		*value = current_evmcs->host_gs_selector;
+		break;
+	case HOST_TR_SELECTOR:
+		*value = current_evmcs->host_tr_selector;
+		break;
+	case GUEST_ES_SELECTOR:
+		*value = current_evmcs->guest_es_selector;
+		break;
+	case GUEST_CS_SELECTOR:
+		*value = current_evmcs->guest_cs_selector;
+		break;
+	case GUEST_SS_SELECTOR:
+		*value = current_evmcs->guest_ss_selector;
+		break;
+	case GUEST_DS_SELECTOR:
+		*value = current_evmcs->guest_ds_selector;
+		break;
+	case GUEST_FS_SELECTOR:
+		*value = current_evmcs->guest_fs_selector;
+		break;
+	case GUEST_GS_SELECTOR:
+		*value = current_evmcs->guest_gs_selector;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		*value = current_evmcs->guest_ldtr_selector;
+		break;
+	case GUEST_TR_SELECTOR:
+		*value = current_evmcs->guest_tr_selector;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		*value = current_evmcs->virtual_processor_id;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		current_evmcs->guest_rip = value;
+		break;
+	case GUEST_RSP:
+		current_evmcs->guest_rsp = value;
+		break;
+	case GUEST_RFLAGS:
+		current_evmcs->guest_rflags = value;
+		break;
+	case HOST_IA32_PAT:
+		current_evmcs->host_ia32_pat = value;
+		break;
+	case HOST_IA32_EFER:
+		current_evmcs->host_ia32_efer = value;
+		break;
+	case HOST_CR0:
+		current_evmcs->host_cr0 = value;
+		break;
+	case HOST_CR3:
+		current_evmcs->host_cr3 = value;
+		break;
+	case HOST_CR4:
+		current_evmcs->host_cr4 = value;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		current_evmcs->host_ia32_sysenter_esp = value;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		current_evmcs->host_ia32_sysenter_eip = value;
+		break;
+	case HOST_RIP:
+		current_evmcs->host_rip = value;
+		break;
+	case IO_BITMAP_A:
+		current_evmcs->io_bitmap_a = value;
+		break;
+	case IO_BITMAP_B:
+		current_evmcs->io_bitmap_b = value;
+		break;
+	case MSR_BITMAP:
+		current_evmcs->msr_bitmap = value;
+		break;
+	case GUEST_ES_BASE:
+		current_evmcs->guest_es_base = value;
+		break;
+	case GUEST_CS_BASE:
+		current_evmcs->guest_cs_base = value;
+		break;
+	case GUEST_SS_BASE:
+		current_evmcs->guest_ss_base = value;
+		break;
+	case GUEST_DS_BASE:
+		current_evmcs->guest_ds_base = value;
+		break;
+	case GUEST_FS_BASE:
+		current_evmcs->guest_fs_base = value;
+		break;
+	case GUEST_GS_BASE:
+		current_evmcs->guest_gs_base = value;
+		break;
+	case GUEST_LDTR_BASE:
+		current_evmcs->guest_ldtr_base = value;
+		break;
+	case GUEST_TR_BASE:
+		current_evmcs->guest_tr_base = value;
+		break;
+	case GUEST_GDTR_BASE:
+		current_evmcs->guest_gdtr_base = value;
+		break;
+	case GUEST_IDTR_BASE:
+		current_evmcs->guest_idtr_base = value;
+		break;
+	case TSC_OFFSET:
+		current_evmcs->tsc_offset = value;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		current_evmcs->virtual_apic_page_addr = value;
+		break;
+	case VMCS_LINK_POINTER:
+		current_evmcs->vmcs_link_pointer = value;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		current_evmcs->guest_ia32_debugctl = value;
+		break;
+	case GUEST_IA32_PAT:
+		current_evmcs->guest_ia32_pat = value;
+		break;
+	case GUEST_IA32_EFER:
+		current_evmcs->guest_ia32_efer = value;
+		break;
+	case GUEST_PDPTR0:
+		current_evmcs->guest_pdptr0 = value;
+		break;
+	case GUEST_PDPTR1:
+		current_evmcs->guest_pdptr1 = value;
+		break;
+	case GUEST_PDPTR2:
+		current_evmcs->guest_pdptr2 = value;
+		break;
+	case GUEST_PDPTR3:
+		current_evmcs->guest_pdptr3 = value;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		current_evmcs->guest_pending_dbg_exceptions = value;
+		break;
+	case GUEST_SYSENTER_ESP:
+		current_evmcs->guest_sysenter_esp = value;
+		break;
+	case GUEST_SYSENTER_EIP:
+		current_evmcs->guest_sysenter_eip = value;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		current_evmcs->cr0_guest_host_mask = value;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		current_evmcs->cr4_guest_host_mask = value;
+		break;
+	case CR0_READ_SHADOW:
+		current_evmcs->cr0_read_shadow = value;
+		break;
+	case CR4_READ_SHADOW:
+		current_evmcs->cr4_read_shadow = value;
+		break;
+	case GUEST_CR0:
+		current_evmcs->guest_cr0 = value;
+		break;
+	case GUEST_CR3:
+		current_evmcs->guest_cr3 = value;
+		break;
+	case GUEST_CR4:
+		current_evmcs->guest_cr4 = value;
+		break;
+	case GUEST_DR7:
+		current_evmcs->guest_dr7 = value;
+		break;
+	case HOST_FS_BASE:
+		current_evmcs->host_fs_base = value;
+		break;
+	case HOST_GS_BASE:
+		current_evmcs->host_gs_base = value;
+		break;
+	case HOST_TR_BASE:
+		current_evmcs->host_tr_base = value;
+		break;
+	case HOST_GDTR_BASE:
+		current_evmcs->host_gdtr_base = value;
+		break;
+	case HOST_IDTR_BASE:
+		current_evmcs->host_idtr_base = value;
+		break;
+	case HOST_RSP:
+		current_evmcs->host_rsp = value;
+		break;
+	case EPT_POINTER:
+		current_evmcs->ept_pointer = value;
+		break;
+	case GUEST_BNDCFGS:
+		current_evmcs->guest_bndcfgs = value;
+		break;
+	case XSS_EXIT_BITMAP:
+		current_evmcs->xss_exit_bitmap = value;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		current_evmcs->guest_physical_address = value;
+		break;
+	case EXIT_QUALIFICATION:
+		current_evmcs->exit_qualification = value;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		current_evmcs->guest_linear_address = value;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		current_evmcs->vm_exit_msr_store_addr = value;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		current_evmcs->vm_exit_msr_load_addr = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		current_evmcs->vm_entry_msr_load_addr = value;
+		break;
+	case CR3_TARGET_VALUE0:
+		current_evmcs->cr3_target_value0 = value;
+		break;
+	case CR3_TARGET_VALUE1:
+		current_evmcs->cr3_target_value1 = value;
+		break;
+	case CR3_TARGET_VALUE2:
+		current_evmcs->cr3_target_value2 = value;
+		break;
+	case CR3_TARGET_VALUE3:
+		current_evmcs->cr3_target_value3 = value;
+		break;
+	case TPR_THRESHOLD:
+		current_evmcs->tpr_threshold = value;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		current_evmcs->guest_interruptibility_info = value;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		current_evmcs->cpu_based_vm_exec_control = value;
+		break;
+	case EXCEPTION_BITMAP:
+		current_evmcs->exception_bitmap = value;
+		break;
+	case VM_ENTRY_CONTROLS:
+		current_evmcs->vm_entry_controls = value;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		current_evmcs->vm_entry_intr_info_field = value;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		current_evmcs->vm_entry_exception_error_code = value;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		current_evmcs->vm_entry_instruction_len = value;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		current_evmcs->host_ia32_sysenter_cs = value;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		current_evmcs->pin_based_vm_exec_control = value;
+		break;
+	case VM_EXIT_CONTROLS:
+		current_evmcs->vm_exit_controls = value;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		current_evmcs->secondary_vm_exec_control = value;
+		break;
+	case GUEST_ES_LIMIT:
+		current_evmcs->guest_es_limit = value;
+		break;
+	case GUEST_CS_LIMIT:
+		current_evmcs->guest_cs_limit = value;
+		break;
+	case GUEST_SS_LIMIT:
+		current_evmcs->guest_ss_limit = value;
+		break;
+	case GUEST_DS_LIMIT:
+		current_evmcs->guest_ds_limit = value;
+		break;
+	case GUEST_FS_LIMIT:
+		current_evmcs->guest_fs_limit = value;
+		break;
+	case GUEST_GS_LIMIT:
+		current_evmcs->guest_gs_limit = value;
+		break;
+	case GUEST_LDTR_LIMIT:
+		current_evmcs->guest_ldtr_limit = value;
+		break;
+	case GUEST_TR_LIMIT:
+		current_evmcs->guest_tr_limit = value;
+		break;
+	case GUEST_GDTR_LIMIT:
+		current_evmcs->guest_gdtr_limit = value;
+		break;
+	case GUEST_IDTR_LIMIT:
+		current_evmcs->guest_idtr_limit = value;
+		break;
+	case GUEST_ES_AR_BYTES:
+		current_evmcs->guest_es_ar_bytes = value;
+		break;
+	case GUEST_CS_AR_BYTES:
+		current_evmcs->guest_cs_ar_bytes = value;
+		break;
+	case GUEST_SS_AR_BYTES:
+		current_evmcs->guest_ss_ar_bytes = value;
+		break;
+	case GUEST_DS_AR_BYTES:
+		current_evmcs->guest_ds_ar_bytes = value;
+		break;
+	case GUEST_FS_AR_BYTES:
+		current_evmcs->guest_fs_ar_bytes = value;
+		break;
+	case GUEST_GS_AR_BYTES:
+		current_evmcs->guest_gs_ar_bytes = value;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		current_evmcs->guest_ldtr_ar_bytes = value;
+		break;
+	case GUEST_TR_AR_BYTES:
+		current_evmcs->guest_tr_ar_bytes = value;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		current_evmcs->guest_activity_state = value;
+		break;
+	case GUEST_SYSENTER_CS:
+		current_evmcs->guest_sysenter_cs = value;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		current_evmcs->vm_instruction_error = value;
+		break;
+	case VM_EXIT_REASON:
+		current_evmcs->vm_exit_reason = value;
+		break;
+	case VM_EXIT_INTR_INFO:
+		current_evmcs->vm_exit_intr_info = value;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		current_evmcs->vm_exit_intr_error_code = value;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		current_evmcs->idt_vectoring_info_field = value;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		current_evmcs->idt_vectoring_error_code = value;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		current_evmcs->vm_exit_instruction_len = value;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		current_evmcs->vmx_instruction_info = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		current_evmcs->page_fault_error_code_mask = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		current_evmcs->page_fault_error_code_match = value;
+		break;
+	case CR3_TARGET_COUNT:
+		current_evmcs->cr3_target_count = value;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		current_evmcs->vm_exit_msr_store_count = value;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		current_evmcs->vm_exit_msr_load_count = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		current_evmcs->vm_entry_msr_load_count = value;
+		break;
+	case HOST_ES_SELECTOR:
+		current_evmcs->host_es_selector = value;
+		break;
+	case HOST_CS_SELECTOR:
+		current_evmcs->host_cs_selector = value;
+		break;
+	case HOST_SS_SELECTOR:
+		current_evmcs->host_ss_selector = value;
+		break;
+	case HOST_DS_SELECTOR:
+		current_evmcs->host_ds_selector = value;
+		break;
+	case HOST_FS_SELECTOR:
+		current_evmcs->host_fs_selector = value;
+		break;
+	case HOST_GS_SELECTOR:
+		current_evmcs->host_gs_selector = value;
+		break;
+	case HOST_TR_SELECTOR:
+		current_evmcs->host_tr_selector = value;
+		break;
+	case GUEST_ES_SELECTOR:
+		current_evmcs->guest_es_selector = value;
+		break;
+	case GUEST_CS_SELECTOR:
+		current_evmcs->guest_cs_selector = value;
+		break;
+	case GUEST_SS_SELECTOR:
+		current_evmcs->guest_ss_selector = value;
+		break;
+	case GUEST_DS_SELECTOR:
+		current_evmcs->guest_ds_selector = value;
+		break;
+	case GUEST_FS_SELECTOR:
+		current_evmcs->guest_fs_selector = value;
+		break;
+	case GUEST_GS_SELECTOR:
+		current_evmcs->guest_gs_selector = value;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		current_evmcs->guest_ldtr_selector = value;
+		break;
+	case GUEST_TR_SELECTOR:
+		current_evmcs->guest_tr_selector = value;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		current_evmcs->virtual_processor_id = value;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000..7d29aa786
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/list.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR		0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES		512
+#define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
+#define DEFAULT_STACK_PGS		5
+
+enum vm_guest_mode {
+	VM_MODE_P52V48_4K,
+	VM_MODE_P52V48_64K,
+	VM_MODE_P48V48_4K,
+	VM_MODE_P48V48_64K,
+	VM_MODE_P40V48_4K,
+	VM_MODE_P40V48_64K,
+	VM_MODE_PXXV48_4K,	/* For 48bits VA but ANY bits PA */
+	NUM_VM_MODES,
+};
+
+#if defined(__aarch64__)
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#elif defined(__x86_64__)
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#else
+#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#endif
+
+#define vm_guest_mode_string(m) vm_guest_mode_string[m]
+extern const char * const vm_guest_mode_string[];
+
+enum vm_mem_backing_src_type {
+	VM_MEM_SRC_ANONYMOUS,
+	VM_MEM_SRC_ANONYMOUS_THP,
+	VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+		    struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+			    uint64_t first_page, uint32_t num_pages);
+
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+		       size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+		     uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   vm     - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+	       uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+		void *arg);
+int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+		void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+			  uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      unsigned int npages, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_guest_debug *debug);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+		       struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
+		  struct kvm_fpu *fpu);
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
+		  struct kvm_fpu *fpu);
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+#ifdef __KVM_HAVE_VCPU_EVENTS
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_vcpu_events *events);
+#endif
+#ifdef __x86_64__
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+			   struct kvm_nested_state *state);
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_nested_state *state, bool ignore_error);
+#endif
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - VM Virtual Address
+ *   paddr - VM Physical Address
+ *   memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		 uint32_t memslot);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot);
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The number of extra pages to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using memslot 0)
+ *   guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - The id of the VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+	unsigned int n;
+	n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+	/* s390 requires 1M aligned guest sizes */
+	n = (n + 255) & ~255;
+#endif
+	return n;
+}
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define sync_global_to_guest(vm, g) ({				\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(_p, &(g), sizeof(g));				\
+})
+
+#define sync_global_from_guest(vm, g) ({			\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(&(g), _p, sizeof(g));				\
+})
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
+/* Common ucalls */
+enum {
+	UCALL_NONE,
+	UCALL_SYNC,
+	UCALL_ABORT,
+	UCALL_DONE,
+};
+
+#define UCALL_MAX_ARGS 6
+
+struct ucall {
+	uint64_t cmd;
+	uint64_t args[UCALL_MAX_ARGS];
+};
+
+void ucall_init(struct kvm_vm *vm, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
+				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE()		ucall(UCALL_DONE, 0)
+#define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
+	if (!(_condition))					\
+		ucall(UCALL_ABORT, 2 + _nargs,			\
+			"Failed guest assert: "			\
+			#_condition, __LINE__, _args);		\
+} while (0)
+
+#define GUEST_ASSERT(_condition) \
+	__GUEST_ASSERT((_condition), 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+	__GUEST_ASSERT((_condition), 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+	__GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+	__GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+	__GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
new file mode 100644
index 000000000..261805205
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/perf_test_util.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
+#define SELFTEST_KVM_PERF_TEST_UTIL_H
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAX_VCPUS 512
+
+#define PAGE_SHIFT_4K  12
+#define PTES_PER_4K_PT 512
+
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM		0xc0000000
+
+#define DEFAULT_PER_VCPU_MEM_SIZE	(1 << 30) /* 1G */
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+/* Number of VCPUs for the test */
+static int nr_vcpus = 1;
+
+struct vcpu_args {
+	uint64_t gva;
+	uint64_t pages;
+
+	/* Only used by the host userspace part of the vCPU thread */
+	int vcpu_id;
+};
+
+struct perf_test_args {
+	struct kvm_vm *vm;
+	uint64_t host_page_size;
+	uint64_t guest_page_size;
+	int wr_fract;
+
+	struct vcpu_args vcpu_args[MAX_VCPUS];
+};
+
+static struct perf_test_args perf_test_args;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+	struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+	uint64_t gva;
+	uint64_t pages;
+	int i;
+
+	/* Make sure vCPU args data structure is not corrupt. */
+	GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+	gva = vcpu_args->gva;
+	pages = vcpu_args->pages;
+
+	while (true) {
+		for (i = 0; i < pages; i++) {
+			uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+			if (i % perf_test_args.wr_fract == 0)
+				*(uint64_t *)addr = 0x0123456789ABCDEF;
+			else
+				READ_ONCE(*(uint64_t *)addr);
+		}
+
+		GUEST_SYNC(1);
+	}
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+				uint64_t vcpu_memory_bytes)
+{
+	struct kvm_vm *vm;
+	uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+	uint64_t guest_num_pages;
+
+	/* Account for a few pages per-vCPU for stacks */
+	pages += DEFAULT_STACK_PGS * vcpus;
+
+	/*
+	 * Reserve twice the ammount of memory needed to map the test region and
+	 * the page table / stacks region, at 4k, for page tables. Do the
+	 * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+	 * page size guest will need even less memory for page tables).
+	 */
+	pages += (2 * pages) / PTES_PER_4K_PT;
+	pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+		 PTES_PER_4K_PT;
+	pages = vm_adjust_num_guest_pages(mode, pages);
+
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	vm = vm_create(mode, pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+
+	perf_test_args.vm = vm;
+	perf_test_args.guest_page_size = vm_get_page_size(vm);
+	perf_test_args.host_page_size = getpagesize();
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+		    "Guest memory size is not guest page size aligned.");
+
+	guest_num_pages = (vcpus * vcpu_memory_bytes) /
+			  perf_test_args.guest_page_size;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+	/*
+	 * If there should be more memory in the guest test region than there
+	 * can be pages in the guest, it will definitely cause problems.
+	 */
+	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+		    "Requested more guest memory than address space allows.\n"
+		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
+		    vcpu_memory_bytes);
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+		    "Guest memory size is not host page size aligned.");
+
+	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+			      perf_test_args.guest_page_size;
+	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+
+#ifdef __s390x__
+	/* Align to 1M (segment size) */
+	guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+	/* Add an extra memory slot for testing */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_phys_mem,
+				    TEST_MEM_SLOT_INDEX,
+				    guest_num_pages, 0);
+
+	/* Do mapping for the demand paging memory slot */
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+	ucall_init(vm, NULL);
+
+	return vm;
+}
+
+static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+	vm_paddr_t vcpu_gpa;
+	struct vcpu_args *vcpu_args;
+	int vcpu_id;
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+		vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+#ifdef __x86_64__
+		vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+		vcpu_args->vcpu_id = vcpu_id;
+		vcpu_args->gva = guest_test_virt_mem +
+				 (vcpu_id * vcpu_memory_bytes);
+		vcpu_args->pages = vcpu_memory_bytes /
+				   perf_test_args.guest_page_size;
+
+		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+			 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+	}
+}
+
+#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
new file mode 100644
index 000000000..e0e96a5f6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN	~0xfffUL /* region/segment table origin	   */
+#define REGION_ENTRY_PROTECT	0x200	 /* region protection bit	   */
+#define REGION_ENTRY_NOEXEC	0x100	 /* region no-execute bit	   */
+#define REGION_ENTRY_OFFSET	0xc0	 /* region table offset		   */
+#define REGION_ENTRY_INVALID	0x20	 /* invalid region table entry	   */
+#define REGION_ENTRY_TYPE	0x0c	 /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH	0x03	 /* region third length		   */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID	0x400		/* HW invalid bit    */
+#define PAGE_PROTECT	0x200		/* HW read-only bit  */
+#define PAGE_NOEXEC	0x100		/* HW no-execute bit */
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000..12a9a4b9c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t.  Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other.  This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef SELFTEST_KVM_SPARSEBIT_H
+#define SELFTEST_KVM_SPARSEBIT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+			  sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+			    sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+				       sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+					 sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+		       sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+			 sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+		    unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SELFTEST_KVM_SPARSEBIT_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000..ffffa5604
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_TEST_UTIL_H
+#define SELFTEST_KVM_TEST_UTIL_H
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "kselftest.h"
+
+static inline int _no_printf(const char *format, ...) { return 0; }
+
+#ifdef DEBUG
+#define pr_debug(...) printf(__VA_ARGS__)
+#else
+#define pr_debug(...) _no_printf(__VA_ARGS__)
+#endif
+#ifndef QUIET
+#define pr_info(...) printf(__VA_ARGS__)
+#else
+#define pr_info(...) _no_printf(__VA_ARGS__)
+#endif
+
+void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+		 const char *file, unsigned int line, const char *fmt, ...)
+		__attribute__((format(printf, 5, 6)));
+
+#define TEST_ASSERT(e, fmt, ...) \
+	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+	typeof(a) __a = (a); \
+	typeof(b) __b = (b); \
+	TEST_ASSERT(__a == __b, \
+		    "ASSERT_EQ(%s, %s) failed.\n" \
+		    "\t%s is %#lx\n" \
+		    "\t%s is %#lx", \
+		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#define TEST_FAIL(fmt, ...) \
+	TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+
+size_t parse_size(const char *size);
+
+int64_t timespec_to_ns(struct timespec ts);
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_div(struct timespec ts, int divisor);
+
+#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
new file mode 100644
index 000000000..8e61340b3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/processor.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <asm/msr-index.h>
+
+#define X86_EFLAGS_FIXED	 (1u << 1)
+
+#define X86_CR4_VME		(1ul << 0)
+#define X86_CR4_PVI		(1ul << 1)
+#define X86_CR4_TSD		(1ul << 2)
+#define X86_CR4_DE		(1ul << 3)
+#define X86_CR4_PSE		(1ul << 4)
+#define X86_CR4_PAE		(1ul << 5)
+#define X86_CR4_MCE		(1ul << 6)
+#define X86_CR4_PGE		(1ul << 7)
+#define X86_CR4_PCE		(1ul << 8)
+#define X86_CR4_OSFXSR		(1ul << 9)
+#define X86_CR4_OSXMMEXCPT	(1ul << 10)
+#define X86_CR4_UMIP		(1ul << 11)
+#define X86_CR4_VMXE		(1ul << 13)
+#define X86_CR4_SMXE		(1ul << 14)
+#define X86_CR4_FSGSBASE	(1ul << 16)
+#define X86_CR4_PCIDE		(1ul << 17)
+#define X86_CR4_OSXSAVE		(1ul << 18)
+#define X86_CR4_SMEP		(1ul << 20)
+#define X86_CR4_SMAP		(1ul << 21)
+#define X86_CR4_PKE		(1ul << 22)
+
+#define UNEXPECTED_VECTOR_PORT 0xfff0u
+
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 rsp;
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+};
+
+struct desc64 {
+	uint16_t limit0;
+	uint16_t base0;
+	unsigned base1:8, type:4, s:1, dpl:2, p:1;
+	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+	uint32_t base3;
+	uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+	uint16_t size;
+	uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+	return ((uint64_t)desc->base3 << 32) |
+		(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+	uint32_t eax, edx;
+	uint64_t tsc_val;
+	/*
+	 * The lfence is to wait (on Intel CPUs) until all previous
+	 * instructions have been executed. If software requires RDTSC to be
+	 * executed prior to execution of any subsequent instruction, it can
+	 * execute LFENCE immediately after RDTSC
+	 */
+	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+	tsc_val = ((uint64_t)edx) << 32 | eax;
+	return tsc_val;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+	uint32_t eax, edx;
+
+	__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+	return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+	uint16_t tmp;
+
+	__asm__ __volatile__("in %%dx, %%ax"
+		: /* output */ "=a" (tmp)
+		: /* input */ "d" (port));
+
+	return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+	uint16_t es;
+
+	__asm__ __volatile__("mov %%es, %[es]"
+			     : /* output */ [es]"=rm"(es));
+	return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+	uint16_t cs;
+
+	__asm__ __volatile__("mov %%cs, %[cs]"
+			     : /* output */ [cs]"=rm"(cs));
+	return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+	uint16_t ss;
+
+	__asm__ __volatile__("mov %%ss, %[ss]"
+			     : /* output */ [ss]"=rm"(ss));
+	return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+	uint16_t ds;
+
+	__asm__ __volatile__("mov %%ds, %[ds]"
+			     : /* output */ [ds]"=rm"(ds));
+	return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+	uint16_t fs;
+
+	__asm__ __volatile__("mov %%fs, %[fs]"
+			     : /* output */ [fs]"=rm"(fs));
+	return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+	uint16_t gs;
+
+	__asm__ __volatile__("mov %%gs, %[gs]"
+			     : /* output */ [gs]"=rm"(gs));
+	return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+	uint16_t tr;
+
+	__asm__ __volatile__("str %[tr]"
+			     : /* output */ [tr]"=rm"(tr));
+	return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+	uint64_t cr0;
+
+	__asm__ __volatile__("mov %%cr0, %[cr0]"
+			     : /* output */ [cr0]"=r"(cr0));
+	return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+	uint64_t cr3;
+
+	__asm__ __volatile__("mov %%cr3, %[cr3]"
+			     : /* output */ [cr3]"=r"(cr3));
+	return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+	uint64_t cr4;
+
+	__asm__ __volatile__("mov %%cr4, %[cr4]"
+			     : /* output */ [cr4]"=r"(cr4));
+	return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline struct desc_ptr get_gdt(void)
+{
+	struct desc_ptr gdt;
+	__asm__ __volatile__("sgdt %[gdt]"
+			     : /* output */ [gdt]"=m"(gdt));
+	return gdt;
+}
+
+static inline struct desc_ptr get_idt(void)
+{
+	struct desc_ptr idt;
+	__asm__ __volatile__("sidt %[idt]"
+			     : /* output */ [idt]"=m"(idt));
+	return idt;
+}
+
+static inline void outl(uint16_t port, uint32_t value)
+{
+	__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+#define SET_XMM(__var, __xmm) \
+	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+	switch (n) {
+	case 0:
+		SET_XMM(val, xmm0);
+		break;
+	case 1:
+		SET_XMM(val, xmm1);
+		break;
+	case 2:
+		SET_XMM(val, xmm2);
+		break;
+	case 3:
+		SET_XMM(val, xmm3);
+		break;
+	case 4:
+		SET_XMM(val, xmm4);
+		break;
+	case 5:
+		SET_XMM(val, xmm5);
+		break;
+	case 6:
+		SET_XMM(val, xmm6);
+		break;
+	case 7:
+		SET_XMM(val, xmm7);
+		break;
+	}
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+	assert(n >= 0 && n <= 7);
+
+	register v1di xmm0 __asm__("%xmm0");
+	register v1di xmm1 __asm__("%xmm1");
+	register v1di xmm2 __asm__("%xmm2");
+	register v1di xmm3 __asm__("%xmm3");
+	register v1di xmm4 __asm__("%xmm4");
+	register v1di xmm5 __asm__("%xmm5");
+	register v1di xmm6 __asm__("%xmm6");
+	register v1di xmm7 __asm__("%xmm7");
+	switch (n) {
+	case 0:
+		return (unsigned long)xmm0;
+	case 1:
+		return (unsigned long)xmm1;
+	case 2:
+		return (unsigned long)xmm2;
+	case 3:
+		return (unsigned long)xmm3;
+	case 4:
+		return (unsigned long)xmm4;
+	case 5:
+		return (unsigned long)xmm5;
+	case 6:
+		return (unsigned long)xmm6;
+	case 7:
+		return (unsigned long)xmm7;
+	}
+	return 0;
+}
+
+bool is_intel_cpu(void);
+
+struct kvm_x86_state;
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_x86_state *state);
+
+struct kvm_msr_list *kvm_get_msr_index_list(void);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+	return kvm_get_supported_cpuid_index(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+		  uint64_t msr_value);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	  	  uint64_t msr_value);
+
+uint32_t kvm_get_cpuid_max_basic(void);
+uint32_t kvm_get_cpuid_max_extended(void);
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+
+struct ex_regs {
+	uint64_t rax, rcx, rdx, rbx;
+	uint64_t rbp, rsi, rdi;
+	uint64_t r8, r9, r10, r11;
+	uint64_t r12, r13, r14, r15;
+	uint64_t vector;
+	uint64_t error_code;
+	uint64_t rip;
+	uint64_t cs;
+	uint64_t rflags;
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+			void (*handler)(struct ex_regs *));
+
+/*
+ * set_cpuid() - overwrites a matching cpuid entry with the provided value.
+ *		 matches based on ent->function && ent->index. returns true
+ *		 if a match was found and successfully overwritten.
+ * @cpuid: the kvm cpuid list to modify.
+ * @ent: cpuid entry to insert
+ */
+bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+		       uint64_t a3);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM          (1UL<<2) /* Emulation */
+#define X86_CR0_TS          (1UL<<3) /* Task Switched */
+#define X86_CR0_ET          (1UL<<4) /* Extension Type */
+#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP          (1UL<<16) /* Write Protect */
+#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG          (1UL<<31) /* Paging */
+
+#define APIC_BASE_MSR	0x800
+#define X2APIC_ENABLE	(1UL << 10)
+#define	APIC_ICR	0x300
+#define		APIC_DEST_SELF		0x40000
+#define		APIC_DEST_ALLINC	0x80000
+#define		APIC_DEST_ALLBUT	0xC0000
+#define		APIC_ICR_RR_MASK	0x30000
+#define		APIC_ICR_RR_INVALID	0x00000
+#define		APIC_ICR_RR_INPROG	0x10000
+#define		APIC_ICR_RR_VALID	0x20000
+#define		APIC_INT_LEVELTRIG	0x08000
+#define		APIC_INT_ASSERT		0x04000
+#define		APIC_ICR_BUSY		0x01000
+#define		APIC_DEST_LOGICAL	0x00800
+#define		APIC_DEST_PHYSICAL	0x00000
+#define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
+#define		APIC_DM_LOWEST		0x00100
+#define		APIC_DM_SMI		0x00200
+#define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_NMI		0x00400
+#define		APIC_DM_INIT		0x00500
+#define		APIC_DM_STARTUP		0x00600
+#define		APIC_DM_EXTINT		0x00700
+#define		APIC_VECTOR_MASK	0x000FF
+#define	APIC_ICR2	0x310
+
+/* VMX_EPT_VPID_CAP bits */
+#define VMX_EPT_VPID_CAP_AD_BITS       (1ULL << 21)
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
new file mode 100644
index 000000000..f4ea2355d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm.h
+ * This is a copy of arch/x86/include/asm/svm.h
+ *
+ */
+
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+	INTERCEPT_INTR,
+	INTERCEPT_NMI,
+	INTERCEPT_SMI,
+	INTERCEPT_INIT,
+	INTERCEPT_VINTR,
+	INTERCEPT_SELECTIVE_CR0,
+	INTERCEPT_STORE_IDTR,
+	INTERCEPT_STORE_GDTR,
+	INTERCEPT_STORE_LDTR,
+	INTERCEPT_STORE_TR,
+	INTERCEPT_LOAD_IDTR,
+	INTERCEPT_LOAD_GDTR,
+	INTERCEPT_LOAD_LDTR,
+	INTERCEPT_LOAD_TR,
+	INTERCEPT_RDTSC,
+	INTERCEPT_RDPMC,
+	INTERCEPT_PUSHF,
+	INTERCEPT_POPF,
+	INTERCEPT_CPUID,
+	INTERCEPT_RSM,
+	INTERCEPT_IRET,
+	INTERCEPT_INTn,
+	INTERCEPT_INVD,
+	INTERCEPT_PAUSE,
+	INTERCEPT_HLT,
+	INTERCEPT_INVLPG,
+	INTERCEPT_INVLPGA,
+	INTERCEPT_IOIO_PROT,
+	INTERCEPT_MSR_PROT,
+	INTERCEPT_TASK_SWITCH,
+	INTERCEPT_FERR_FREEZE,
+	INTERCEPT_SHUTDOWN,
+	INTERCEPT_VMRUN,
+	INTERCEPT_VMMCALL,
+	INTERCEPT_VMLOAD,
+	INTERCEPT_VMSAVE,
+	INTERCEPT_STGI,
+	INTERCEPT_CLGI,
+	INTERCEPT_SKINIT,
+	INTERCEPT_RDTSCP,
+	INTERCEPT_ICEBP,
+	INTERCEPT_WBINVD,
+	INTERCEPT_MONITOR,
+	INTERCEPT_MWAIT,
+	INTERCEPT_MWAIT_COND,
+	INTERCEPT_XSETBV,
+	INTERCEPT_RDPRU,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	u32 intercept_cr;
+	u32 intercept_dr;
+	u32 intercept_exceptions;
+	u64 intercept;
+	u8 reserved_1[40];
+	u16 pause_filter_thresh;
+	u16 pause_filter_count;
+	u64 iopm_base_pa;
+	u64 msrpm_base_pa;
+	u64 tsc_offset;
+	u32 asid;
+	u8 tlb_ctl;
+	u8 reserved_2[3];
+	u32 int_ctl;
+	u32 int_vector;
+	u32 int_state;
+	u8 reserved_3[4];
+	u32 exit_code;
+	u32 exit_code_hi;
+	u64 exit_info_1;
+	u64 exit_info_2;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+	u64 nested_ctl;
+	u64 avic_vapic_bar;
+	u8 reserved_4[8];
+	u32 event_inj;
+	u32 event_inj_err;
+	u64 nested_cr3;
+	u64 virt_ext;
+	u32 clean;
+	u32 reserved_5;
+	u64 next_rip;
+	u8 insn_len;
+	u8 insn_bytes[15];
+	u64 avic_backing_page;	/* Offset 0xe0 */
+	u8 reserved_6[8];	/* Offset 0xe8 */
+	u64 avic_logical_id;	/* Offset 0xf0 */
+	u64 avic_physical_id;	/* Offset 0xf8 */
+	u8 reserved_7[768];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK	0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE	BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE	BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	u16 selector;
+	u16 attrib;
+	u32 limit;
+	u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u8 reserved_1[43];
+	u8 cpl;
+	u8 reserved_2[4];
+	u64 efer;
+	u8 reserved_3[112];
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u8 reserved_4[88];
+	u64 rsp;
+	u8 reserved_5[24];
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_6[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ	0
+#define INTERCEPT_CR3_READ	3
+#define INTERCEPT_CR4_READ	4
+#define INTERCEPT_CR8_READ	8
+#define INTERCEPT_CR0_WRITE	(16 + 0)
+#define INTERCEPT_CR3_WRITE	(16 + 3)
+#define INTERCEPT_CR4_WRITE	(16 + 4)
+#define INTERCEPT_CR8_WRITE	(16 + 8)
+
+#define INTERCEPT_DR0_READ	0
+#define INTERCEPT_DR1_READ	1
+#define INTERCEPT_DR2_READ	2
+#define INTERCEPT_DR3_READ	3
+#define INTERCEPT_DR4_READ	4
+#define INTERCEPT_DR5_READ	5
+#define INTERCEPT_DR6_READ	6
+#define INTERCEPT_DR7_READ	7
+#define INTERCEPT_DR0_WRITE	(16 + 0)
+#define INTERCEPT_DR1_WRITE	(16 + 1)
+#define INTERCEPT_DR2_WRITE	(16 + 2)
+#define INTERCEPT_DR3_WRITE	(16 + 3)
+#define INTERCEPT_DR4_WRITE	(16 + 4)
+#define INTERCEPT_DR5_WRITE	(16 + 5)
+#define INTERCEPT_DR6_WRITE	(16 + 6)
+#define INTERCEPT_DR7_WRITE	(16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
new file mode 100644
index 000000000..b7531c83b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
+ * Header for nested SVM testing
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+#define CPUID_SVM_BIT		2
+#define CPUID_SVM		BIT_ULL(CPUID_SVM_BIT)
+
+#define SVM_EXIT_VMMCALL	0x081
+
+struct svm_test_data {
+	/* VMCB */
+	struct vmcb *vmcb; /* gva */
+	void *vmcb_hva;
+	uint64_t vmcb_gpa;
+
+	/* host state-save area */
+	struct vmcb_save_area *save_area; /* gva */
+	void *save_area_hva;
+	uint64_t save_area_gpa;
+};
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+bool nested_svm_supported(void);
+void nested_svm_check_supported(void);
+
+static inline bool cpu_has_svm(void)
+{
+	u32 eax = 0x80000001, ecx;
+
+	asm("cpuid" :
+	    "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
+
+	return ecx & CPUID_SVM;
+}
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
new file mode 100644
index 000000000..e78d7e26b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -0,0 +1,625 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "processor.h"
+
+#define CPUID_VMX_BIT				5
+
+#define CPUID_VMX				(1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_INTR_WINDOW_EXITING		0x00000004
+#define CPU_BASED_USE_TSC_OFFSETTING		0x00000008
+#define CPU_BASED_HLT_EXITING			0x00000080
+#define CPU_BASED_INVLPG_EXITING		0x00000200
+#define CPU_BASED_MWAIT_EXITING			0x00000400
+#define CPU_BASED_RDPMC_EXITING			0x00000800
+#define CPU_BASED_RDTSC_EXITING			0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
+#define CPU_BASED_CR3_STORE_EXITING		0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING		0x00080000
+#define CPU_BASED_CR8_STORE_EXITING		0x00100000
+#define CPU_BASED_TPR_SHADOW			0x00200000
+#define CPU_BASED_NMI_WINDOW_EXITING		0x00400000
+#define CPU_BASED_MOV_DR_EXITING		0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING		0x01000000
+#define CPU_BASED_USE_IO_BITMAPS		0x02000000
+#define CPU_BASED_MONITOR_TRAP			0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS		0x10000000
+#define CPU_BASED_MONITOR_EXITING		0x20000000
+#define CPU_BASED_PAUSE_EXITING			0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS	0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT		0x00000002
+#define SECONDARY_EXEC_DESC			0x00000004
+#define SECONDARY_EXEC_ENABLE_RDTSCP		0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE	0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID		0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT	0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY	0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING		0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC		0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS		0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING		0x00010000
+#define SECONDARY_EXEC_ENABLE_PML		0x00020000
+#define SECONDARY_EPT_VE			0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE		0x00100000
+#define SECONDARY_EXEC_TSC_SCALING		0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK			0x00000001
+#define PIN_BASED_NMI_EXITING			0x00000008
+#define PIN_BASED_VIRTUAL_NMIS			0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER		0x00000040
+#define PIN_BASED_POSTED_INTR			0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS		0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE		0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL	0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT		0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
+#define VM_EXIT_SAVE_IA32_EFER			0x00100000
+#define VM_EXIT_LOAD_IA32_EFER			0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER	0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS		0x00000004
+#define VM_ENTRY_IA32E_MODE			0x00000200
+#define VM_ENTRY_SMM				0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR		0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL	0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER			0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA			0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY	0x80000000
+#define EXIT_REASON_EXCEPTION_NMI	0
+#define EXIT_REASON_EXTERNAL_INTERRUPT	1
+#define EXIT_REASON_TRIPLE_FAULT	2
+#define EXIT_REASON_INTERRUPT_WINDOW	7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH		9
+#define EXIT_REASON_CPUID		10
+#define EXIT_REASON_HLT			12
+#define EXIT_REASON_INVD		13
+#define EXIT_REASON_INVLPG		14
+#define EXIT_REASON_RDPMC		15
+#define EXIT_REASON_RDTSC		16
+#define EXIT_REASON_VMCALL		18
+#define EXIT_REASON_VMCLEAR		19
+#define EXIT_REASON_VMLAUNCH		20
+#define EXIT_REASON_VMPTRLD		21
+#define EXIT_REASON_VMPTRST		22
+#define EXIT_REASON_VMREAD		23
+#define EXIT_REASON_VMRESUME		24
+#define EXIT_REASON_VMWRITE		25
+#define EXIT_REASON_VMOFF		26
+#define EXIT_REASON_VMON		27
+#define EXIT_REASON_CR_ACCESS		28
+#define EXIT_REASON_DR_ACCESS		29
+#define EXIT_REASON_IO_INSTRUCTION	30
+#define EXIT_REASON_MSR_READ		31
+#define EXIT_REASON_MSR_WRITE		32
+#define EXIT_REASON_INVALID_STATE	33
+#define EXIT_REASON_MWAIT_INSTRUCTION	36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION	40
+#define EXIT_REASON_MCE_DURING_VMENTRY	41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS		44
+#define EXIT_REASON_EOI_INDUCED		45
+#define EXIT_REASON_EPT_VIOLATION	48
+#define EXIT_REASON_EPT_MISCONFIG	49
+#define EXIT_REASON_INVEPT		50
+#define EXIT_REASON_RDTSCP		51
+#define EXIT_REASON_PREEMPTION_TIMER	52
+#define EXIT_REASON_INVVPID		53
+#define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
+#define EXIT_REASON_APIC_WRITE		56
+#define EXIT_REASON_INVPCID		58
+#define EXIT_REASON_PML_FULL		62
+#define EXIT_REASON_XSAVES		63
+#define EXIT_REASON_XRSTORS		64
+#define LAST_EXIT_REASON		64
+
+enum vmcs_field {
+	VIRTUAL_PROCESSOR_ID		= 0x00000000,
+	POSTED_INTR_NV			= 0x00000002,
+	GUEST_ES_SELECTOR		= 0x00000800,
+	GUEST_CS_SELECTOR		= 0x00000802,
+	GUEST_SS_SELECTOR		= 0x00000804,
+	GUEST_DS_SELECTOR		= 0x00000806,
+	GUEST_FS_SELECTOR		= 0x00000808,
+	GUEST_GS_SELECTOR		= 0x0000080a,
+	GUEST_LDTR_SELECTOR		= 0x0000080c,
+	GUEST_TR_SELECTOR		= 0x0000080e,
+	GUEST_INTR_STATUS		= 0x00000810,
+	GUEST_PML_INDEX			= 0x00000812,
+	HOST_ES_SELECTOR		= 0x00000c00,
+	HOST_CS_SELECTOR		= 0x00000c02,
+	HOST_SS_SELECTOR		= 0x00000c04,
+	HOST_DS_SELECTOR		= 0x00000c06,
+	HOST_FS_SELECTOR		= 0x00000c08,
+	HOST_GS_SELECTOR		= 0x00000c0a,
+	HOST_TR_SELECTOR		= 0x00000c0c,
+	IO_BITMAP_A			= 0x00002000,
+	IO_BITMAP_A_HIGH		= 0x00002001,
+	IO_BITMAP_B			= 0x00002002,
+	IO_BITMAP_B_HIGH		= 0x00002003,
+	MSR_BITMAP			= 0x00002004,
+	MSR_BITMAP_HIGH			= 0x00002005,
+	VM_EXIT_MSR_STORE_ADDR		= 0x00002006,
+	VM_EXIT_MSR_STORE_ADDR_HIGH	= 0x00002007,
+	VM_EXIT_MSR_LOAD_ADDR		= 0x00002008,
+	VM_EXIT_MSR_LOAD_ADDR_HIGH	= 0x00002009,
+	VM_ENTRY_MSR_LOAD_ADDR		= 0x0000200a,
+	VM_ENTRY_MSR_LOAD_ADDR_HIGH	= 0x0000200b,
+	PML_ADDRESS			= 0x0000200e,
+	PML_ADDRESS_HIGH		= 0x0000200f,
+	TSC_OFFSET			= 0x00002010,
+	TSC_OFFSET_HIGH			= 0x00002011,
+	VIRTUAL_APIC_PAGE_ADDR		= 0x00002012,
+	VIRTUAL_APIC_PAGE_ADDR_HIGH	= 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	POSTED_INTR_DESC_ADDR		= 0x00002016,
+	POSTED_INTR_DESC_ADDR_HIGH	= 0x00002017,
+	EPT_POINTER			= 0x0000201a,
+	EPT_POINTER_HIGH		= 0x0000201b,
+	EOI_EXIT_BITMAP0		= 0x0000201c,
+	EOI_EXIT_BITMAP0_HIGH		= 0x0000201d,
+	EOI_EXIT_BITMAP1		= 0x0000201e,
+	EOI_EXIT_BITMAP1_HIGH		= 0x0000201f,
+	EOI_EXIT_BITMAP2		= 0x00002020,
+	EOI_EXIT_BITMAP2_HIGH		= 0x00002021,
+	EOI_EXIT_BITMAP3		= 0x00002022,
+	EOI_EXIT_BITMAP3_HIGH		= 0x00002023,
+	VMREAD_BITMAP			= 0x00002026,
+	VMREAD_BITMAP_HIGH		= 0x00002027,
+	VMWRITE_BITMAP			= 0x00002028,
+	VMWRITE_BITMAP_HIGH		= 0x00002029,
+	XSS_EXIT_BITMAP			= 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH		= 0x0000202D,
+	TSC_MULTIPLIER			= 0x00002032,
+	TSC_MULTIPLIER_HIGH		= 0x00002033,
+	GUEST_PHYSICAL_ADDRESS		= 0x00002400,
+	GUEST_PHYSICAL_ADDRESS_HIGH	= 0x00002401,
+	VMCS_LINK_POINTER		= 0x00002800,
+	VMCS_LINK_POINTER_HIGH		= 0x00002801,
+	GUEST_IA32_DEBUGCTL		= 0x00002802,
+	GUEST_IA32_DEBUGCTL_HIGH	= 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
+	GUEST_IA32_EFER			= 0x00002806,
+	GUEST_IA32_EFER_HIGH		= 0x00002807,
+	GUEST_IA32_PERF_GLOBAL_CTRL	= 0x00002808,
+	GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+	GUEST_PDPTR0			= 0x0000280a,
+	GUEST_PDPTR0_HIGH		= 0x0000280b,
+	GUEST_PDPTR1			= 0x0000280c,
+	GUEST_PDPTR1_HIGH		= 0x0000280d,
+	GUEST_PDPTR2			= 0x0000280e,
+	GUEST_PDPTR2_HIGH		= 0x0000280f,
+	GUEST_PDPTR3			= 0x00002810,
+	GUEST_PDPTR3_HIGH		= 0x00002811,
+	GUEST_BNDCFGS			= 0x00002812,
+	GUEST_BNDCFGS_HIGH		= 0x00002813,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
+	HOST_IA32_EFER			= 0x00002c02,
+	HOST_IA32_EFER_HIGH		= 0x00002c03,
+	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
+	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
+	PIN_BASED_VM_EXEC_CONTROL	= 0x00004000,
+	CPU_BASED_VM_EXEC_CONTROL	= 0x00004002,
+	EXCEPTION_BITMAP		= 0x00004004,
+	PAGE_FAULT_ERROR_CODE_MASK	= 0x00004006,
+	PAGE_FAULT_ERROR_CODE_MATCH	= 0x00004008,
+	CR3_TARGET_COUNT		= 0x0000400a,
+	VM_EXIT_CONTROLS		= 0x0000400c,
+	VM_EXIT_MSR_STORE_COUNT		= 0x0000400e,
+	VM_EXIT_MSR_LOAD_COUNT		= 0x00004010,
+	VM_ENTRY_CONTROLS		= 0x00004012,
+	VM_ENTRY_MSR_LOAD_COUNT		= 0x00004014,
+	VM_ENTRY_INTR_INFO_FIELD	= 0x00004016,
+	VM_ENTRY_EXCEPTION_ERROR_CODE	= 0x00004018,
+	VM_ENTRY_INSTRUCTION_LEN	= 0x0000401a,
+	TPR_THRESHOLD			= 0x0000401c,
+	SECONDARY_VM_EXEC_CONTROL	= 0x0000401e,
+	PLE_GAP				= 0x00004020,
+	PLE_WINDOW			= 0x00004022,
+	VM_INSTRUCTION_ERROR		= 0x00004400,
+	VM_EXIT_REASON			= 0x00004402,
+	VM_EXIT_INTR_INFO		= 0x00004404,
+	VM_EXIT_INTR_ERROR_CODE		= 0x00004406,
+	IDT_VECTORING_INFO_FIELD	= 0x00004408,
+	IDT_VECTORING_ERROR_CODE	= 0x0000440a,
+	VM_EXIT_INSTRUCTION_LEN		= 0x0000440c,
+	VMX_INSTRUCTION_INFO		= 0x0000440e,
+	GUEST_ES_LIMIT			= 0x00004800,
+	GUEST_CS_LIMIT			= 0x00004802,
+	GUEST_SS_LIMIT			= 0x00004804,
+	GUEST_DS_LIMIT			= 0x00004806,
+	GUEST_FS_LIMIT			= 0x00004808,
+	GUEST_GS_LIMIT			= 0x0000480a,
+	GUEST_LDTR_LIMIT		= 0x0000480c,
+	GUEST_TR_LIMIT			= 0x0000480e,
+	GUEST_GDTR_LIMIT		= 0x00004810,
+	GUEST_IDTR_LIMIT		= 0x00004812,
+	GUEST_ES_AR_BYTES		= 0x00004814,
+	GUEST_CS_AR_BYTES		= 0x00004816,
+	GUEST_SS_AR_BYTES		= 0x00004818,
+	GUEST_DS_AR_BYTES		= 0x0000481a,
+	GUEST_FS_AR_BYTES		= 0x0000481c,
+	GUEST_GS_AR_BYTES		= 0x0000481e,
+	GUEST_LDTR_AR_BYTES		= 0x00004820,
+	GUEST_TR_AR_BYTES		= 0x00004822,
+	GUEST_INTERRUPTIBILITY_INFO	= 0x00004824,
+	GUEST_ACTIVITY_STATE		= 0X00004826,
+	GUEST_SYSENTER_CS		= 0x0000482A,
+	VMX_PREEMPTION_TIMER_VALUE	= 0x0000482E,
+	HOST_IA32_SYSENTER_CS		= 0x00004c00,
+	CR0_GUEST_HOST_MASK		= 0x00006000,
+	CR4_GUEST_HOST_MASK		= 0x00006002,
+	CR0_READ_SHADOW			= 0x00006004,
+	CR4_READ_SHADOW			= 0x00006006,
+	CR3_TARGET_VALUE0		= 0x00006008,
+	CR3_TARGET_VALUE1		= 0x0000600a,
+	CR3_TARGET_VALUE2		= 0x0000600c,
+	CR3_TARGET_VALUE3		= 0x0000600e,
+	EXIT_QUALIFICATION		= 0x00006400,
+	GUEST_LINEAR_ADDRESS		= 0x0000640a,
+	GUEST_CR0			= 0x00006800,
+	GUEST_CR3			= 0x00006802,
+	GUEST_CR4			= 0x00006804,
+	GUEST_ES_BASE			= 0x00006806,
+	GUEST_CS_BASE			= 0x00006808,
+	GUEST_SS_BASE			= 0x0000680a,
+	GUEST_DS_BASE			= 0x0000680c,
+	GUEST_FS_BASE			= 0x0000680e,
+	GUEST_GS_BASE			= 0x00006810,
+	GUEST_LDTR_BASE			= 0x00006812,
+	GUEST_TR_BASE			= 0x00006814,
+	GUEST_GDTR_BASE			= 0x00006816,
+	GUEST_IDTR_BASE			= 0x00006818,
+	GUEST_DR7			= 0x0000681a,
+	GUEST_RSP			= 0x0000681c,
+	GUEST_RIP			= 0x0000681e,
+	GUEST_RFLAGS			= 0x00006820,
+	GUEST_PENDING_DBG_EXCEPTIONS	= 0x00006822,
+	GUEST_SYSENTER_ESP		= 0x00006824,
+	GUEST_SYSENTER_EIP		= 0x00006826,
+	HOST_CR0			= 0x00006c00,
+	HOST_CR3			= 0x00006c02,
+	HOST_CR4			= 0x00006c04,
+	HOST_FS_BASE			= 0x00006c06,
+	HOST_GS_BASE			= 0x00006c08,
+	HOST_TR_BASE			= 0x00006c0a,
+	HOST_GDTR_BASE			= 0x00006c0c,
+	HOST_IDTR_BASE			= 0x00006c0e,
+	HOST_IA32_SYSENTER_ESP		= 0x00006c10,
+	HOST_IA32_SYSENTER_EIP		= 0x00006c12,
+	HOST_RSP			= 0x00006c14,
+	HOST_RIP			= 0x00006c16,
+};
+
+struct vmx_msr_entry {
+	uint32_t index;
+	uint32_t reserved;
+	uint64_t value;
+} __attribute__ ((aligned(16)));
+
+#include "evmcs.h"
+
+static inline int vmxon(uint64_t phys)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(phys)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline void vmxoff(void)
+{
+	__asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(vmcs_pa)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+	uint8_t ret;
+
+	if (enable_evmcs)
+		return -1;
+
+	__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(vmcs_pa)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+	uint64_t tmp;
+	uint8_t ret;
+
+	if (enable_evmcs)
+		return evmcs_vmptrst(value);
+
+	__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+		: [value]"=m"(tmp), [ret]"=rm"(ret)
+		: : "cc", "memory");
+
+	*value = tmp;
+	return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+	uint64_t value = 0;
+	vmptrst(&value);
+	return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+	int ret;
+
+	if (enable_evmcs)
+		return evmcs_vmlaunch();
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "vmwrite %%rsp, %[host_rsp];"
+			     "lea 1f(%%rip), %%rax;"
+			     "vmwrite %%rax, %[host_rip];"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"((uint64_t)HOST_RSP),
+			       [host_rip]"r"((uint64_t)HOST_RIP)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+	int ret;
+
+	if (enable_evmcs)
+		return evmcs_vmresume();
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "vmwrite %%rsp, %[host_rsp];"
+			     "lea 1f(%%rip), %%rax;"
+			     "vmwrite %%rax, %[host_rip];"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"((uint64_t)HOST_RSP),
+			       [host_rip]"r"((uint64_t)HOST_RIP)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+static inline void vmcall(void)
+{
+	/* Currently, L1 destroys our GPRs during vmexits.  */
+	__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
+			     "rax", "rbx", "rcx", "rdx",
+			     "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			     "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+	uint64_t tmp;
+	uint8_t ret;
+
+	if (enable_evmcs)
+		return evmcs_vmread(encoding, value);
+
+	__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+		: [value]"=rm"(tmp), [ret]"=rm"(ret)
+		: [encoding]"r"(encoding)
+		: "cc", "memory");
+
+	*value = tmp;
+	return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+	uint64_t value = 0;
+	vmread(encoding, &value);
+	return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+	uint8_t ret;
+
+	if (enable_evmcs)
+		return evmcs_vmwrite(encoding, value);
+
+	__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [value]"rm"(value), [encoding]"r"(encoding)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+	return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+	void *vmxon_hva;
+	uint64_t vmxon_gpa;
+	void *vmxon;
+
+	void *vmcs_hva;
+	uint64_t vmcs_gpa;
+	void *vmcs;
+
+	void *msr_hva;
+	uint64_t msr_gpa;
+	void *msr;
+
+	void *shadow_vmcs_hva;
+	uint64_t shadow_vmcs_gpa;
+	void *shadow_vmcs;
+
+	void *vmread_hva;
+	uint64_t vmread_gpa;
+	void *vmread;
+
+	void *vmwrite_hva;
+	uint64_t vmwrite_gpa;
+	void *vmwrite;
+
+	void *vp_assist_hva;
+	uint64_t vp_assist_gpa;
+	void *vp_assist;
+
+	void *enlightened_vmcs_hva;
+	uint64_t enlightened_vmcs_gpa;
+	void *enlightened_vmcs;
+
+	void *eptp_hva;
+	uint64_t eptp_gpa;
+	void *eptp;
+
+	void *apic_access_hva;
+	uint64_t apic_access_gpa;
+	void *apic_access;
+};
+
+union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			reserved1:3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1,
+			vm_entry_exception_ctrl:1,
+			reserved2:7;
+	};
+};
+
+union vmx_ctrl_msr {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
+
+bool nested_vmx_supported(void);
+void nested_vmx_check_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		   uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+		 uint32_t eptp_memslot);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+			uint32_t memslot, uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+		  uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
+				      uint32_t eptp_memslot);
+
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
new file mode 100644
index 000000000..aa3795cd7
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_create_max_vcpus
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
+{
+	struct kvm_vm *vm;
+	int i;
+
+	pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
+		num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+	for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
+		/* This asserts that the vCPU was created. */
+		vm_vcpu_add(vm, i);
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
+	int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	/*
+	 * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
+	 * an arbitrary number for everything else.
+	 */
+	int nr_fds_wanted = kvm_max_vcpus + 100;
+	struct rlimit rl;
+
+	pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
+	pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+
+	/*
+	 * Check that we're allowed to open nr_fds_wanted file descriptors and
+	 * try raising the limits if needed.
+	 */
+	TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+	if (rl.rlim_cur < nr_fds_wanted) {
+		rl.rlim_cur = nr_fds_wanted;
+		if (rl.rlim_max < nr_fds_wanted) {
+			int old_rlim_max = rl.rlim_max;
+			rl.rlim_max = nr_fds_wanted;
+
+			int r = setrlimit(RLIMIT_NOFILE, &rl);
+			if (r < 0) {
+				printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+				       old_rlim_max, nr_fds_wanted);
+				exit(KSFT_SKIP);
+			}
+		} else {
+			TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+		}
+	}
+
+	/*
+	 * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
+	 * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
+	 * in this case.
+	 */
+	if (!kvm_max_vcpu_id)
+		kvm_max_vcpu_id = kvm_max_vcpus;
+
+	TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
+		    "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+		    kvm_max_vcpu_id, kvm_max_vcpus);
+
+	test_vcpu_creation(0, kvm_max_vcpus);
+
+	if (kvm_max_vcpu_id > kvm_max_vcpus)
+		test_vcpu_creation(
+			kvm_max_vcpu_id - kvm_max_vcpus, kvm_max_vcpus);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
new file mode 100644
index 000000000..d6c32c328
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <linux/compiler.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+	return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels == 4,
+		"Mode %d does not have 4 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels >= 3,
+		"Mode %d does not have >= 3 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+	return (gva >> vm->page_shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+	uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
+	return entry & mask;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+{
+	return 1 << (vm->page_shift - 3);
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		  uint32_t pgd_memslot, uint64_t flags)
+{
+	uint8_t attr_idx = flags & 7;
+	uint64_t *ptep;
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx", vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+	if (!*ptep) {
+		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		*ptep |= 3;
+	}
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+		break;
+	default:
+		TEST_FAIL("Page table levels must be 2, 3, or 4");
+	}
+
+	*ptep = paddr | 3;
+	*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		 uint32_t pgd_memslot)
+{
+	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+
+	_virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t *ptep;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+	if (!ptep)
+		goto unmapped_gva;
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		break;
+	default:
+		TEST_FAIL("Page table levels must be 2, 3, or 4");
+	}
+
+	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+	exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG
+	static const char * const type[] = { "", "pud", "pmd", "pte" };
+	uint64_t pte, *ptep;
+
+	if (level == 4)
+		return;
+
+	for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+		ptep = addr_gpa2hva(vm, pte);
+		if (!*ptep)
+			continue;
+		fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+	}
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	int level = 4 - (vm->pgtable_levels - 1);
+	uint64_t pgd, *ptep;
+
+	if (!vm->pgd_created)
+		return;
+
+	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+		ptep = addr_gpa2hva(vm, pgd);
+		if (!*ptep)
+			continue;
+		fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+	}
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	uint64_t ptrs_per_4k_pte = 512;
+	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
+{
+	struct kvm_vcpu_init default_init = { .target = -1, };
+	uint64_t sctlr_el1, tcr_el1;
+
+	if (!init)
+		init = &default_init;
+
+	if (init->target == -1) {
+		struct kvm_vcpu_init preferred;
+		vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+		init->target = preferred.target;
+	}
+
+	vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
+
+	/*
+	 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+	 * registers, which the variable argument list macros do.
+	 */
+	set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+
+	get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
+	get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+
+	switch (vm->mode) {
+	case VM_MODE_P52V48_4K:
+		TEST_FAIL("AArch64 does not support 4K sized pages "
+			  "with 52-bit physical address ranges");
+	case VM_MODE_PXXV48_4K:
+		TEST_FAIL("AArch64 does not support 4K sized pages "
+			  "with ANY-bit physical address ranges");
+	case VM_MODE_P52V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P48V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+		break;
+	case VM_MODE_P48V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+		break;
+	case VM_MODE_P40V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	case VM_MODE_P40V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	default:
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+	/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+	tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+	tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+
+	set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	uint64_t pstate, pc;
+
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+
+	fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
+		indent, "", pstate, pc);
+}
+
+void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
+			      struct kvm_vcpu_init *init, void *guest_code)
+{
+	size_t stack_size = vm->page_size == 4096 ?
+					DEFAULT_STACK_PGS * vm->page_size :
+					vm->page_size;
+	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+					DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid);
+	aarch64_vcpu_setup(vm, vcpuid, init);
+
+	set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+	set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	int i;
+
+	TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+		    "  num: %u\n", num);
+
+	va_start(ap, num);
+
+	for (i = 0; i < num; i++) {
+		set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
+			va_arg(ap, uint64_t));
+	}
+
+	va_end(ap);
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
new file mode 100644
index 000000000..f600311fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+		return false;
+
+	virt_pg_map(vm, gpa, gpa, 0);
+
+	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+	return true;
+}
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+	vm_paddr_t gpa, start, end, step, offset;
+	unsigned int bits;
+	bool ret;
+
+	if (arg) {
+		gpa = (vm_paddr_t)arg;
+		ret = ucall_mmio_init(vm, gpa);
+		TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+		return;
+	}
+
+	/*
+	 * Find an address within the allowed physical and virtual address
+	 * spaces, that does _not_ have a KVM memory region associated with
+	 * it. Identity mapping an address like this allows the guest to
+	 * access it, but as KVM doesn't know what to do with it, it
+	 * will assume it's something userspace handles and exit with
+	 * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+	 * Here we start with a guess that the addresses around 5/8th
+	 * of the allowed space are unmapped and then work both down and
+	 * up from there in 1/16th allowed space sized steps.
+	 *
+	 * Note, we need to use VA-bits - 1 when calculating the allowed
+	 * virtual address space for an identity mapping because the upper
+	 * half of the virtual address space is the two's complement of the
+	 * lower and won't match physical addresses.
+	 */
+	bits = vm->va_bits - 1;
+	bits = vm->pa_bits < bits ? vm->pa_bits : bits;
+	end = 1ul << bits;
+	start = end * 5 / 8;
+	step = end / 16;
+	for (offset = 0; offset < end - start; offset += step) {
+		if (ucall_mmio_init(vm, start - offset))
+			return;
+		if (ucall_mmio_init(vm, start + offset))
+			return;
+	}
+	TEST_FAIL("Can't find a ucall mmio address");
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+	ucall_exit_mmio_addr = 0;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {};
+	va_list va;
+	int i;
+
+	WRITE_ONCE(uc.cmd, cmd);
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+	va_end(va);
+
+	WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+	struct ucall ucall = {};
+
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
+	if (run->exit_reason == KVM_EXIT_MMIO &&
+	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+		vm_vaddr_t gva;
+
+		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+			    "Unexpected ucall exit mmio address access");
+		memcpy(&gva, run->mmio.data, sizeof(gva));
+		memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
+
+		vcpu_run_complete_io(vm, vcpu_id);
+		if (uc)
+			memcpy(uc, &ucall, sizeof(ucall));
+	}
+
+	return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000..5ebbd0d6b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+#include "kselftest.h"
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+	/*
+	 * Build and run this command:
+	 *
+	 *	addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+	 *		grep -v test_dump_stack | cat -n 1>&2
+	 *
+	 * Note that the spacing is different and there's no newline.
+	 */
+	size_t i;
+	size_t n = 20;
+	void *stack[n];
+	const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+	const char *pipeline = "|cat -n 1>&2";
+	char cmd[strlen(addr2line) + strlen(pipeline) +
+		 /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+		 n * (((sizeof(void *)) * 2) + 1) +
+		 /* Null terminator: */
+		 1];
+	char *c;
+
+	n = backtrace(stack, n);
+	c = &cmd[0];
+	c += sprintf(c, "%s", addr2line);
+	/*
+	 * Skip the first 3 frames: backtrace, test_dump_stack, and
+	 * test_assert. We hope that backtrace isn't inlined and the other two
+	 * we've declared noinline.
+	 */
+	for (i = 2; i < n; i++)
+		c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+	c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+	system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t _gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+	const char *file, unsigned int line, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!(exp)) {
+		va_start(ap, fmt);
+
+		fprintf(stderr, "==== Test Assertion Failure ====\n"
+			"  %s:%u: %s\n"
+			"  pid=%d tid=%d - %s\n",
+			file, line, exp_str, getpid(), _gettid(),
+			strerror(errno));
+		test_dump_stack();
+		if (fmt) {
+			fputs("  ", stderr);
+			vfprintf(stderr, fmt, ap);
+			fputs("\n", stderr);
+		}
+		va_end(ap);
+
+		if (errno == EACCES) {
+			print_skip("Access denied - Exiting");
+			exit(KSFT_SKIP);
+		}
+		exit(254);
+	}
+
+	return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000..bc75a91e0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+	off_t offset_rv;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in and validate ELF Identification Record.
+	 * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+	 * of the ELF header, which is at the beginning of the ELF file.
+	 * For now it is only safe to read the first EI_NIDENT bytes.  Once
+	 * read and validated, the value of e_ehsize can be used to determine
+	 * the real size of the ELF header.
+	 */
+	unsigned char ident[EI_NIDENT];
+	test_read(fd, ident, sizeof(ident));
+	TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+		&& (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+		"ELF MAGIC Mismatch,\n"
+		"  filename: %s\n"
+		"  ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+		"  Expected: %02x %02x %02x %02x",
+		filename,
+		ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+		ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+	TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+		"Current implementation only able to handle ELFCLASS64,\n"
+		"  filename: %s\n"
+		"  ident[EI_CLASS]: %02x\n"
+		"  expected: %02x",
+		filename,
+		ident[EI_CLASS], ELFCLASS64);
+	TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2LSB))
+		|| ((BYTE_ORDER == BIG_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+		"implementation only able to handle\n"
+		"cases where the host and ELF file endianness\n"
+		"is the same:\n"
+		"  host BYTE_ORDER: %u\n"
+		"  host LITTLE_ENDIAN: %u\n"
+		"  host BIG_ENDIAN: %u\n"
+		"  ident[EI_DATA]: %u\n"
+		"  ELFDATA2LSB: %u\n"
+		"  ELFDATA2MSB: %u",
+		BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+		ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+	TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+		"Current implementation only able to handle current "
+		"ELF version,\n"
+		"  filename: %s\n"
+		"  ident[EI_VERSION]: %02x\n"
+		"  expected: %02x",
+		filename, ident[EI_VERSION], EV_CURRENT);
+
+	/* Read in the ELF header.
+	 * With the ELF Identification portion of the ELF header
+	 * validated, especially that the value at EI_VERSION is
+	 * as expected, it is now safe to read the entire ELF header.
+	 */
+	offset_rv = lseek(fd, 0, SEEK_SET);
+	TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+		"  rv: %zi expected: %i", offset_rv, 0);
+	test_read(fd, hdrp, sizeof(*hdrp));
+	TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+		"Unexpected physical header size,\n"
+		"  hdrp->e_phentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_phentsize, sizeof(Elf64_Phdr));
+	TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+		"Unexpected section header size,\n"
+		"  hdrp->e_shentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ *   filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ *   vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm.  On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+	uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	off_t offset, offset_rv;
+	Elf64_Ehdr hdr;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in the ELF header. */
+	elfhdr_get(filename, &hdr);
+
+	/* For each program header.
+	 * The following ELF header members specify the location
+	 * and size of the program headers:
+	 *
+	 *   e_phoff - File offset to start of program headers
+	 *   e_phentsize - Size of each program header
+	 *   e_phnum - Number of program header entries
+	 */
+	for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+		/* Seek to the beginning of the program header. */
+		offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+		offset_rv = lseek(fd, offset, SEEK_SET);
+		TEST_ASSERT(offset_rv == offset,
+			"Failed to seek to begining of program header %u,\n"
+			"  filename: %s\n"
+			"  rv: %jd errno: %i",
+			n1, filename, (intmax_t) offset_rv, errno);
+
+		/* Read in the program header. */
+		Elf64_Phdr phdr;
+		test_read(fd, &phdr, sizeof(phdr));
+
+		/* Skip if this header doesn't describe a loadable segment. */
+		if (phdr.p_type != PT_LOAD)
+			continue;
+
+		/* Allocate memory for this segment within the VM. */
+		TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+			"memsize of 0,\n"
+			"  phdr index: %u p_memsz: 0x%" PRIx64,
+			n1, (uint64_t) phdr.p_memsz);
+		vm_vaddr_t seg_vstart = phdr.p_vaddr;
+		seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+		vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+		seg_vend |= vm->page_size - 1;
+		size_t seg_size = seg_vend - seg_vstart + 1;
+
+		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+			data_memslot, pgd_memslot);
+		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+			"virtual memory for segment at requested min addr,\n"
+			"  segment idx: %u\n"
+			"  seg_vstart: 0x%lx\n"
+			"  vaddr: 0x%lx",
+			n1, seg_vstart, vaddr);
+		memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+		/* TODO(lhuemill): Set permissions of each memory segment
+		 * based on the least-significant 3 bits of phdr.p_flags.
+		 */
+
+		/* Load portion of initial state that is contained within
+		 * the ELF file.
+		 */
+		if (phdr.p_filesz) {
+			offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+			TEST_ASSERT(offset_rv == phdr.p_offset,
+				"Seek to program segment offset failed,\n"
+				"  program header idx: %u errno: %i\n"
+				"  offset_rv: 0x%jx\n"
+				"  expected: 0x%jx\n",
+				n1, errno, (intmax_t) offset_rv,
+				(intmax_t) phdr.p_offset);
+			test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+				phdr.p_filesz);
+		}
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000..fedb2a741
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Write of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be written.
+ *  count - Number of bytes to write.
+ *
+ * Output:
+ *  buf   - Starting address of data to be written.
+ *
+ * Return:
+ *  On success, number of bytes written.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_written = 0;
+	size_t num_left = count;
+	const char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+	 * write(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = write(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected write failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			continue;
+
+		case 0:
+			TEST_FAIL("Unexpected EOF,\n"
+				  "  rc: %zi num_written: %zi num_left: %zu",
+				  rc, num_written, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+				"  rc: %zi errno: %i", rc, errno);
+			num_written += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_written < count);
+
+	return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Read of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.  Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read.  A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read.  It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be read.
+ *  count - Number of bytes to read.
+ *
+ * Output:
+ *  buf   - Starting address of where to write the bytes read.
+ *
+ * Return:
+ *  On success, number of bytes read.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_read = 0;
+	size_t num_left = count;
+	char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "If count is zero" portion of
+	 * read(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = read(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected read failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			break;
+
+		case 0:
+			TEST_FAIL("Unexpected EOF,\n"
+				  "   rc: %zi num_read: %zi num_left: %zu",
+				  rc, num_read, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			num_read += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_read < count);
+
+	return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000..49805fd16
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1865 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "processor.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PFN	2
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+	size_t mask = size - 1;
+	TEST_ASSERT(size != 0 && !(size & (size - 1)),
+		    "size not a power of 2: %lu", size);
+	return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/*
+ * Capability
+ *
+ * Input Args:
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   On success, the Value corresponding to the capability (KVM_CAP_*)
+ *   specified by the value of cap.  On failure a TEST_ASSERT failure
+ *   is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+	int ret;
+	int kvm_fd;
+
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	close(kvm_fd);
+
+	return ret;
+}
+
+/* VM Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	return ret;
+}
+
+/* VCPU Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpu_id - VCPU
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VCPU.
+ */
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+		    struct kvm_enable_cap *cap)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
+	int r;
+
+	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
+
+	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
+	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
+			"  rc: %i, errno: %i", r, errno);
+
+	return r;
+}
+
+static void vm_open(struct kvm_vm *vm, int perm)
+{
+	vm->kvm_fd = open(KVM_DEV_PATH, perm);
+	if (vm->kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
+		print_skip("immediate_exit not available");
+		exit(KSFT_SKIP);
+	}
+
+	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
+	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+		"rc: %i errno: %i", vm->fd, errno);
+}
+
+const char * const vm_guest_mode_string[] = {
+	"PA-bits:52,  VA-bits:48,  4K pages",
+	"PA-bits:52,  VA-bits:48, 64K pages",
+	"PA-bits:48,  VA-bits:48,  4K pages",
+	"PA-bits:48,  VA-bits:48, 64K pages",
+	"PA-bits:40,  VA-bits:48,  4K pages",
+	"PA-bits:40,  VA-bits:48, 64K pages",
+	"PA-bits:ANY, VA-bits:48,  4K pages",
+};
+_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
+	       "Missing new mode strings?");
+
+struct vm_guest_mode_params {
+	unsigned int pa_bits;
+	unsigned int va_bits;
+	unsigned int page_size;
+	unsigned int page_shift;
+};
+
+static const struct vm_guest_mode_params vm_guest_mode_params[] = {
+	{ 52, 48,  0x1000, 12 },
+	{ 52, 48, 0x10000, 16 },
+	{ 48, 48,  0x1000, 12 },
+	{ 48, 48, 0x10000, 16 },
+	{ 40, 48,  0x1000, 12 },
+	{ 40, 48, 0x10000, 16 },
+	{  0,  0,  0x1000, 12 },
+};
+_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
+	       "Missing new mode params?");
+
+/*
+ * VM Create
+ *
+ * Input Args:
+ *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ *   phy_pages - Physical memory pages
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0.  The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+	struct kvm_vm *vm;
+
+	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
+		 vm_guest_mode_string(mode), phy_pages, perm);
+
+	vm = calloc(1, sizeof(*vm));
+	TEST_ASSERT(vm != NULL, "Insufficient Memory");
+
+	INIT_LIST_HEAD(&vm->vcpus);
+	INIT_LIST_HEAD(&vm->userspace_mem_regions);
+
+	vm->mode = mode;
+	vm->type = 0;
+
+	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
+	vm->va_bits = vm_guest_mode_params[mode].va_bits;
+	vm->page_size = vm_guest_mode_params[mode].page_size;
+	vm->page_shift = vm_guest_mode_params[mode].page_shift;
+
+	/* Setup mode specific traits. */
+	switch (vm->mode) {
+	case VM_MODE_P52V48_4K:
+		vm->pgtable_levels = 4;
+		break;
+	case VM_MODE_P52V48_64K:
+		vm->pgtable_levels = 3;
+		break;
+	case VM_MODE_P48V48_4K:
+		vm->pgtable_levels = 4;
+		break;
+	case VM_MODE_P48V48_64K:
+		vm->pgtable_levels = 3;
+		break;
+	case VM_MODE_P40V48_4K:
+		vm->pgtable_levels = 4;
+		break;
+	case VM_MODE_P40V48_64K:
+		vm->pgtable_levels = 3;
+		break;
+	case VM_MODE_PXXV48_4K:
+#ifdef __x86_64__
+		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+		/*
+		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
+		 * it doesn't take effect unless a CR4.LA57 is set, which it
+		 * isn't for this VM_MODE.
+		 */
+		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
+			    "Linear address width (%d bits) not supported",
+			    vm->va_bits);
+		pr_debug("Guest physical address width detected: %d\n",
+			 vm->pa_bits);
+		vm->pgtable_levels = 4;
+		vm->va_bits = 48;
+#else
+		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+#endif
+		break;
+	default:
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
+	}
+
+#ifdef __aarch64__
+	if (vm->pa_bits != 40)
+		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
+#endif
+
+	vm_open(vm, perm);
+
+	/* Limit to VA-bit canonical virtual addresses. */
+	vm->vpages_valid = sparsebit_alloc();
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+	/* Limit physical addresses to PA-bits. */
+	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+
+	/* Allocate and setup memory for guest. */
+	vm->vpages_mapped = sparsebit_alloc();
+	if (phy_pages != 0)
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    0, 0, phy_pages, 0);
+
+	return vm;
+}
+
+/*
+ * VM Restart
+ *
+ * Input Args:
+ *   vm - VM that has been released before
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Reopens the file descriptors associated to the VM and reinstates the
+ * global state, such as the irqchip and the memory regions that are mapped
+ * into the guest.
+ */
+void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+{
+	struct userspace_mem_region *region;
+
+	vm_open(vmp, perm);
+	if (vmp->has_irqchip)
+		vm_create_irqchip(vmp);
+
+	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
+		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+			    "  rc: %i errno: %i\n"
+			    "  slot: %u flags: 0x%x\n"
+			    "  guest_phys_addr: 0x%llx size: 0x%llx",
+			    ret, errno, region->region.slot,
+			    region->region.flags,
+			    region->region.guest_phys_addr,
+			    region->region.memory_size);
+	}
+}
+
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+		    __func__, strerror(-ret));
+}
+
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+			    uint64_t first_page, uint32_t num_pages)
+{
+	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
+		                            .first_page = first_page,
+	                                    .num_pages = num_pages };
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
+	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
+		    __func__, strerror(-ret));
+}
+
+/*
+ * Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive.  If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned.  Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		uint64_t existing_start = region->region.guest_phys_addr;
+		uint64_t existing_end = region->region.guest_phys_addr
+			+ region->region.memory_size - 1;
+		if (start <= existing_end && end >= existing_start)
+			return region;
+	}
+
+	return NULL;
+}
+
+/*
+ * KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	region = userspace_mem_region_find(vm, start, end);
+	if (!region)
+		return NULL;
+
+	return &region->region;
+}
+
+/*
+ * VCPU Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu;
+
+	list_for_each_entry(vcpu, &vm->vcpus, list) {
+		if (vcpu->id == vcpuid)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+/*
+ * VM VCPU Remove
+ *
+ * Input Args:
+ *   vcpu - VCPU to remove
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Removes a vCPU from a VM and frees its resources.
+ */
+static void vm_vcpu_rm(struct vcpu *vcpu)
+{
+	int ret;
+
+	ret = munmap(vcpu->state, sizeof(*vcpu->state));
+	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+	close(vcpu->fd);
+	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+
+	list_del(&vcpu->list);
+	free(vcpu);
+}
+
+void kvm_vm_release(struct kvm_vm *vmp)
+{
+	struct vcpu *vcpu, *tmp;
+	int ret;
+
+	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+		vm_vcpu_rm(vcpu);
+
+	ret = close(vmp->fd);
+	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+	close(vmp->kvm_fd);
+	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
+		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+}
+
+static void __vm_mem_region_delete(struct kvm_vm *vm,
+				   struct userspace_mem_region *region)
+{
+	int ret;
+
+	list_del(&region->list);
+
+	region->region.memory_size = 0;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+		    "rc: %i errno: %i", ret, errno);
+
+	sparsebit_free(&region->unused_phy_pages);
+	ret = munmap(region->mmap_start, region->mmap_size);
+	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+
+	free(region);
+}
+
+/*
+ * Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+	struct userspace_mem_region *region, *tmp;
+
+	if (vmp == NULL)
+		return;
+
+	/* Free userspace_mem_regions. */
+	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+		__vm_mem_region_delete(vmp, region);
+
+	/* Free sparsebit arrays. */
+	sparsebit_free(&vmp->vpages_valid);
+	sparsebit_free(&vmp->vpages_mapped);
+
+	kvm_vm_release(vmp);
+
+	/* Free the structure describing the VM. */
+	free(vmp);
+}
+
+/*
+ * Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ *   hva - Starting host virtual address
+ *   vm - Virtual Machine
+ *   gva - Starting guest virtual address
+ *   len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ *   Returns 0 if the bytes starting at hva for a length of len
+ *   are equal the guest virtual bytes starting at gva.  Returns
+ *   a value < 0, if bytes at hva are less than those at gva.
+ *   Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+	size_t amt;
+
+	/*
+	 * Compare a batch of bytes until either a match is found
+	 * or all the bytes have been compared.
+	 */
+	for (uintptr_t offset = 0; offset < len; offset += amt) {
+		uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+		/*
+		 * Determine host address for guest virtual address
+		 * at offset.
+		 */
+		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+		/*
+		 * Determine amount to compare on this pass.
+		 * Don't allow the comparsion to cross a page boundary.
+		 */
+		amt = len - offset;
+		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr1 % vm->page_size);
+		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr2 % vm->page_size);
+
+		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+		/*
+		 * Perform the comparison.  If there is a difference
+		 * return that result to the caller, otherwise need
+		 * to continue on looking for a mismatch.
+		 */
+		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+		if (ret != 0)
+			return ret;
+	}
+
+	/*
+	 * No mismatch found.  Let the caller know the two memory
+	 * areas are equal.
+	 */
+	return 0;
+}
+
+/*
+ * VM Userspace Memory Region Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   backing_src - Storage source for this region.
+ *                 NULL to use anonymous memory.
+ *   guest_paddr - Starting guest physical address
+ *   slot - KVM region slot
+ *   npages - Number of physical pages
+ *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr.  The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags)
+{
+	int ret;
+	struct userspace_mem_region *region;
+	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+	size_t alignment;
+
+	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
+		"Number of guest pages is not compatible with the host. "
+		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
+
+	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+		"address not on a page boundary.\n"
+		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, vm->page_size);
+	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+		<= vm->max_gfn, "Physical range beyond maximum "
+		"supported physical address,\n"
+		"  guest_paddr: 0x%lx npages: 0x%lx\n"
+		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+	/*
+	 * Confirm a mem region with an overlapping address doesn't
+	 * already exist.
+	 */
+	region = (struct userspace_mem_region *) userspace_mem_region_find(
+		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+	if (region != NULL)
+		TEST_FAIL("overlapping userspace_mem_region already "
+			"exists\n"
+			"  requested guest_paddr: 0x%lx npages: 0x%lx "
+			"page_size: 0x%x\n"
+			"  existing guest_paddr: 0x%lx size: 0x%lx",
+			guest_paddr, npages, vm->page_size,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+
+	/* Confirm no region with the requested slot already exists. */
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if (region->region.slot != slot)
+			continue;
+
+		TEST_FAIL("A mem region with the requested slot "
+			"already exists.\n"
+			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
+			slot, guest_paddr, npages,
+			region->region.slot,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+	}
+
+	/* Allocate and initialize new mem region structure. */
+	region = calloc(1, sizeof(*region));
+	TEST_ASSERT(region != NULL, "Insufficient Memory");
+	region->mmap_size = npages * vm->page_size;
+
+#ifdef __s390x__
+	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
+	alignment = 0x100000;
+#else
+	alignment = 1;
+#endif
+
+	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+		alignment = max(huge_page_size, alignment);
+
+	/* Add enough memory to align up if necessary */
+	if (alignment > 1)
+		region->mmap_size += alignment;
+
+	region->mmap_start = mmap(NULL, region->mmap_size,
+				  PROT_READ | PROT_WRITE,
+				  MAP_PRIVATE | MAP_ANONYMOUS
+				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+				  -1, 0);
+	TEST_ASSERT(region->mmap_start != MAP_FAILED,
+		    "test_malloc failed, mmap_start: %p errno: %i",
+		    region->mmap_start, errno);
+
+	/* Align host address */
+	region->host_mem = align(region->mmap_start, alignment);
+
+	/* As needed perform madvise */
+	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+		struct stat statbuf;
+
+		ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+		TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+			    "stat /sys/kernel/mm/transparent_hugepage");
+
+		TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
+			    "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
+
+		if (ret == 0) {
+			ret = madvise(region->host_mem, npages * vm->page_size,
+				      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+			TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
+				    region->host_mem, npages * vm->page_size, src_type);
+		}
+	}
+
+	region->unused_phy_pages = sparsebit_alloc();
+	sparsebit_set_num(region->unused_phy_pages,
+		guest_paddr >> vm->page_shift, npages);
+	region->region.slot = slot;
+	region->region.flags = flags;
+	region->region.guest_phys_addr = guest_paddr;
+	region->region.memory_size = npages * vm->page_size;
+	region->region.userspace_addr = (uintptr_t) region->host_mem;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i\n"
+		"  slot: %u flags: 0x%x\n"
+		"  guest_phys_addr: 0x%lx size: 0x%lx",
+		ret, errno, slot, flags,
+		guest_paddr, (uint64_t) region->region.memory_size);
+
+	/* Add to linked-list of memory regions. */
+	list_add(&region->list, &vm->userspace_mem_regions);
+}
+
+/*
+ * Memslot to region
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to memory region structure that describe memory region
+ *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
+ *   on error (e.g. currently no memory region using memslot as a KVM
+ *   memory slot ID).
+ */
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if (region->region.slot == memslot)
+			return region;
+	}
+
+	fprintf(stderr, "No mem region with the requested slot found,\n"
+		"  requested slot: %u\n", memslot);
+	fputs("---- vm dump ----\n", stderr);
+	vm_dump(stderr, vm, 2);
+	TEST_FAIL("Mem region not found");
+	return NULL;
+}
+
+/*
+ * VM Memory Region Flags Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+	int ret;
+	struct userspace_mem_region *region;
+
+	region = memslot2region(vm, slot);
+
+	region->region.flags = flags;
+
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i slot: %u flags: 0x%x",
+		ret, errno, slot, flags);
+}
+
+/*
+ * VM Memory Region Move
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   slot - Slot of the memory region to move
+ *   new_gpa - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Change the gpa of a memory region.
+ */
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
+{
+	struct userspace_mem_region *region;
+	int ret;
+
+	region = memslot2region(vm, slot);
+
+	region->region.guest_phys_addr = new_gpa;
+
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
+		    ret, errno, slot, new_gpa);
+}
+
+/*
+ * VM Memory Region Delete
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   slot - Slot of the memory region to delete
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Delete a memory region.
+ */
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+{
+	__vm_mem_region_delete(vm, memslot2region(vm, slot));
+}
+
+/*
+ * VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+	int dev_fd, ret;
+
+	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (dev_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+	TEST_ASSERT(ret >= sizeof(struct kvm_run),
+		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+		__func__, ret, errno);
+
+	close(dev_fd);
+
+	return ret;
+}
+
+/*
+ * VM VCPU Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
+ * No additional VCPU setup is done.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu;
+
+	/* Confirm a vcpu with the specified id doesn't already exist. */
+	vcpu = vcpu_find(vm, vcpuid);
+	if (vcpu != NULL)
+		TEST_FAIL("vcpu with the specified id "
+			"already exists,\n"
+			"  requested vcpuid: %u\n"
+			"  existing vcpuid: %u state: %p",
+			vcpuid, vcpu->id, vcpu->state);
+
+	/* Allocate and initialize new vcpu structure. */
+	vcpu = calloc(1, sizeof(*vcpu));
+	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+	vcpu->id = vcpuid;
+	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+		vcpu->fd, errno);
+
+	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+		vcpu_mmap_sz(), sizeof(*vcpu->state));
+	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+		"vcpu id: %u errno: %i", vcpuid, errno);
+
+	/* Add to linked-list of VCPUs. */
+	list_add(&vcpu->list, &vm->vcpus);
+}
+
+/*
+ * VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size (bytes)
+ *   vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Lowest virtual address at or below vaddr_min, with at least
+ *   sz unused bytes.  TEST_ASSERT failure if no area of at least
+ *   size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes.  A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+				      vm_vaddr_t vaddr_min)
+{
+	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+	/* Determine lowest permitted virtual page index. */
+	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+	if ((pgidx_start * vm->page_size) < vaddr_min)
+		goto no_va_found;
+
+	/* Loop over section with enough valid virtual page indexes. */
+	if (!sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages))
+		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+			pgidx_start, pages);
+	do {
+		/*
+		 * Are there enough unused virtual pages available at
+		 * the currently proposed starting virtual page index.
+		 * If not, adjust proposed starting index to next
+		 * possible.
+		 */
+		if (sparsebit_is_clear_num(vm->vpages_mapped,
+			pgidx_start, pages))
+			goto va_found;
+		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+			pgidx_start, pages);
+		if (pgidx_start == 0)
+			goto no_va_found;
+
+		/*
+		 * If needed, adjust proposed starting virtual address,
+		 * to next range of valid virtual addresses.
+		 */
+		if (!sparsebit_is_set_num(vm->vpages_valid,
+			pgidx_start, pages)) {
+			pgidx_start = sparsebit_next_set_num(
+				vm->vpages_valid, pgidx_start, pages);
+			if (pgidx_start == 0)
+				goto no_va_found;
+		}
+	} while (pgidx_start != 0);
+
+no_va_found:
+	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
+
+	/* NOT REACHED */
+	return -1;
+
+va_found:
+	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages),
+		"Unexpected, invalid virtual page index range,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+		pgidx_start, pages),
+		"Unexpected, pages already mapped,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+
+	return pgidx_start * vm->page_size;
+}
+
+/*
+ * VM Virtual Address Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size in bytes
+ *   vaddr_min - Minimum starting virtual address
+ *   data_memslot - Memory region slot for data pages
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm.  The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min.  Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+			  uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+	virt_pgd_alloc(vm, pgd_memslot);
+
+	/*
+	 * Find an unused range of virtual page addresses of at least
+	 * pages in length.
+	 */
+	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+	/* Map the virtual pages. */
+	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+		pages--, vaddr += vm->page_size) {
+		vm_paddr_t paddr;
+
+		paddr = vm_phy_page_alloc(vm,
+				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+		sparsebit_set(vm->vpages_mapped,
+			vaddr >> vm->page_shift);
+	}
+
+	return vaddr_start;
+}
+
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - Virtuall address to map
+ *   paddr - VM Physical Address
+ *   npages - The number of pages to map
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by @vm, creates a virtual translation for
+ * @npages starting at @vaddr to the page range starting at @paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      unsigned int npages, uint32_t pgd_memslot)
+{
+	size_t page_size = vm->page_size;
+	size_t size = npages * page_size;
+
+	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+	while (npages--) {
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+		vaddr += page_size;
+		paddr += page_size;
+	}
+}
+
+/*
+ * Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm.  When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	struct userspace_mem_region *region;
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if ((gpa >= region->region.guest_phys_addr)
+			&& (gpa <= (region->region.guest_phys_addr
+				+ region->region.memory_size - 1)))
+			return (void *) ((uintptr_t) region->host_mem
+				+ (gpa - region->region.guest_phys_addr));
+	}
+
+	TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+	return NULL;
+}
+
+/*
+ * Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm.  When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+	struct userspace_mem_region *region;
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if ((hva >= region->host_mem)
+			&& (hva <= (region->host_mem
+				+ region->region.memory_size - 1)))
+			return (vm_paddr_t) ((uintptr_t)
+				region->region.guest_phys_addr
+				+ (hva - (uintptr_t) region->host_mem));
+	}
+
+	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
+	return -1;
+}
+
+/*
+ * VM Create IRQ Chip
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+
+	vm->has_irqchip = true;
+}
+
+/*
+ * VM VCPU State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	return vcpu->state;
+}
+
+/*
+ * VM VCPU Run
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	int ret = _vcpu_run(vm, vcpuid);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	do {
+		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+	} while (rc == -1 && errno == EINTR);
+
+	assert_on_unhandled_exception(vm, vcpuid);
+
+	return rc;
+}
+
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	vcpu->state->immediate_exit = 1;
+	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
+	vcpu->state->immediate_exit = 0;
+
+	TEST_ASSERT(ret == -1 && errno == EINTR,
+		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
+		    ret, errno);
+}
+
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_guest_debug *debug)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
+
+	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
+}
+
+/*
+ * VM VCPU Set MP State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+		       struct kvm_mp_state *mp_state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+/*
+ * VM VCPU Get Reg List
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   None
+ *
+ * Return:
+ *   A pointer to an allocated struct kvm_reg_list
+ *
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ */
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+	reg_list->n = reg_list_n.n;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+	return reg_list;
+}
+
+/*
+ * VM VCPU Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/*
+ * VM VCPU Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+#ifdef __KVM_HAVE_VCPU_EVENTS
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+#endif
+
+#ifdef __x86_64__
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+			   struct kvm_nested_state *state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
+	TEST_ASSERT(ret == 0,
+		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
+		ret, errno);
+}
+
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_nested_state *state, bool ignore_error)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
+	if (!ignore_error) {
+		TEST_ASSERT(ret == 0,
+			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
+			ret, errno);
+	}
+
+	return ret;
+}
+#endif
+
+/*
+ * VM VCPU System Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/*
+ * VM VCPU System Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+{
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
+	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
+		    ret, errno, strerror(errno));
+}
+
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+{
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
+	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
+		    ret, errno, strerror(errno));
+}
+
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+{
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
+	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
+		    ret, errno, strerror(errno));
+}
+
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+{
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
+	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
+		    ret, errno, strerror(errno));
+}
+
+/*
+ * VCPU Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+		unsigned long cmd, void *arg)
+{
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
+	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+		unsigned long cmd, void *arg)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, cmd, arg);
+
+	return ret;
+}
+
+/*
+ * VM Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+/*
+ * VM Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct userspace_mem_region *region;
+	struct vcpu *vcpu;
+
+	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+	fprintf(stream, "%*sMem Regions:\n", indent, "");
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+			"host_virt: %p\n", indent + 2, "",
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size,
+			region->host_mem);
+		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+		sparsebit_dump(stream, region->unused_phy_pages, 0);
+	}
+	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+	fprintf(stream, "%*spgd_created: %u\n", indent, "",
+		vm->pgd_created);
+	if (vm->pgd_created) {
+		fprintf(stream, "%*sVirtual Translation Tables:\n",
+			indent + 2, "");
+		virt_dump(stream, vm, indent + 4);
+	}
+	fprintf(stream, "%*sVCPUs:\n", indent, "");
+	list_for_each_entry(vcpu, &vm->vcpus, list)
+		vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+	unsigned int reason;
+	const char *name;
+} exit_reasons_known[] = {
+	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
+	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
+	{KVM_EXIT_IO, "IO"},
+	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
+	{KVM_EXIT_DEBUG, "DEBUG"},
+	{KVM_EXIT_HLT, "HLT"},
+	{KVM_EXIT_MMIO, "MMIO"},
+	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+	{KVM_EXIT_INTR, "INTR"},
+	{KVM_EXIT_SET_TPR, "SET_TPR"},
+	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+	{KVM_EXIT_S390_RESET, "S390_RESET"},
+	{KVM_EXIT_DCR, "DCR"},
+	{KVM_EXIT_NMI, "NMI"},
+	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+	{KVM_EXIT_OSI, "OSI"},
+	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/*
+ * Exit Reason String
+ *
+ * Input Args:
+ *   exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason.  If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+	unsigned int n1;
+
+	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+		if (exit_reason == exit_reasons_known[n1].reason)
+			return exit_reasons_known[n1].name;
+	}
+
+	return "Unknown";
+}
+
+/*
+ * Physical Contiguous Page Allocator
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   num - number of pages
+ *   paddr_min - Physical address minimum
+ *   memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting physical address
+ *
+ * Within the VM specified by vm, locates a range of available physical
+ * pages at or above paddr_min. If found, the pages are marked as in use
+ * and their base address is returned. A TEST_ASSERT failure occurs if
+ * not enough pages are available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+	sparsebit_idx_t pg, base;
+
+	TEST_ASSERT(num > 0, "Must allocate at least one page");
+
+	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+		"not divisible by page size.\n"
+		"  paddr_min: 0x%lx page_size: 0x%x",
+		paddr_min, vm->page_size);
+
+	region = memslot2region(vm, memslot);
+	base = pg = paddr_min >> vm->page_shift;
+
+	do {
+		for (; pg < base + num; ++pg) {
+			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
+				break;
+			}
+		}
+	} while (pg && pg != base + num);
+
+	if (pg == 0) {
+		fprintf(stderr, "No guest physical page available, "
+			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
+			paddr_min, vm->page_size, memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		abort();
+	}
+
+	for (pg = base; pg < base + num; ++pg)
+		sparsebit_clear(region->unused_phy_pages, pg);
+
+	return base * vm->page_size;
+}
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot)
+{
+	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
+}
+
+/*
+ * Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
+
+/*
+ * Is Unrestricted Guest
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
+ *
+ * Check if the unrestricted guest flag is enabled.
+ */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+	char val = 'N';
+	size_t count;
+	FILE *f;
+
+	if (vm == NULL) {
+		/* Ensure that the KVM vendor-specific module is loaded. */
+		f = fopen(KVM_DEV_PATH, "r");
+		TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
+			    errno);
+		fclose(f);
+	}
+
+	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
+	if (f) {
+		count = fread(&val, sizeof(char), 1, f);
+		TEST_ASSERT(count == 1, "Unable to read from param file.");
+		fclose(f);
+	}
+
+	return val == 'Y';
+}
+
+unsigned int vm_get_page_size(struct kvm_vm *vm)
+{
+	return vm->page_size;
+}
+
+unsigned int vm_get_page_shift(struct kvm_vm *vm)
+{
+	return vm->page_shift;
+}
+
+unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+{
+	return vm->max_gfn;
+}
+
+int vm_get_fd(struct kvm_vm *vm)
+{
+	return vm->fd;
+}
+
+static unsigned int vm_calc_num_pages(unsigned int num_pages,
+				      unsigned int page_shift,
+				      unsigned int new_page_shift,
+				      bool ceil)
+{
+	unsigned int n = 1 << (new_page_shift - page_shift);
+
+	if (page_shift >= new_page_shift)
+		return num_pages * (1 << (page_shift - new_page_shift));
+
+	return num_pages / n + !!(ceil && num_pages % n);
+}
+
+static inline int getpageshift(void)
+{
+	return __builtin_ffs(getpagesize()) - 1;
+}
+
+unsigned int
+vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+	return vm_calc_num_pages(num_guest_pages,
+				 vm_guest_mode_params[mode].page_shift,
+				 getpageshift(), true);
+}
+
+unsigned int
+vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
+{
+	return vm_calc_num_pages(num_host_pages, getpageshift(),
+				 vm_guest_mode_params[mode].page_shift, false);
+}
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
+{
+	unsigned int n;
+	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
+	return vm_adjust_num_guest_pages(mode, n);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000..f07d383d0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util_internal.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
+#define SELFTEST_KVM_UTIL_INTERNAL_H
+
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH		"/dev/kvm"
+
+struct userspace_mem_region {
+	struct kvm_userspace_memory_region region;
+	struct sparsebit *unused_phy_pages;
+	int fd;
+	off_t offset;
+	void *host_mem;
+	void *mmap_start;
+	size_t mmap_size;
+	struct list_head list;
+};
+
+struct vcpu {
+	struct list_head list;
+	uint32_t id;
+	int fd;
+	struct kvm_run *state;
+};
+
+struct kvm_vm {
+	int mode;
+	unsigned long type;
+	int kvm_fd;
+	int fd;
+	unsigned int pgtable_levels;
+	unsigned int page_size;
+	unsigned int page_shift;
+	unsigned int pa_bits;
+	unsigned int va_bits;
+	uint64_t max_gfn;
+	struct list_head vcpus;
+	struct list_head userspace_mem_regions;
+	struct sparsebit *vpages_valid;
+	struct sparsebit *vpages_mapped;
+	bool has_irqchip;
+	bool pgd_created;
+	vm_paddr_t pgd;
+	vm_vaddr_t gdt;
+	vm_vaddr_t tss;
+	vm_vaddr_t idt;
+	vm_vaddr_t handlers;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   vm     - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * Register Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   regs   - Registers
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by @regs, to the FILE stream
+ * given by @stream.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+
+/*
+ * System Register Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   sregs  - System registers
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by @sregs, to the FILE stream
+ * given by @stream.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
+#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
new file mode 100644
index 000000000..7349bb2e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "processor.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+
+#define PAGES_PER_REGION 4
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+{
+	vm_paddr_t paddr;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	if (vm->pgd_created)
+		return;
+
+	paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+	memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+	vm->pgd = paddr;
+	vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+{
+	uint64_t taddr;
+
+	taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+	memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+	return (taddr & REGION_ENTRY_ORIGIN)
+		| (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+		| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
+		 uint32_t memslot)
+{
+	int ri, idx;
+	uint64_t *entry;
+
+	TEST_ASSERT((gva % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x",
+		gva, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(gva >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		gva);
+	TEST_ASSERT((gpa % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x",
+		gva, vm->page_size);
+	TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		gva, vm->max_gfn, vm->page_size);
+
+	/* Walk through region and segment tables */
+	entry = addr_gpa2hva(vm, vm->pgd);
+	for (ri = 1; ri <= 4; ri++) {
+		idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+		if (entry[idx] & REGION_ENTRY_INVALID)
+			entry[idx] = virt_alloc_region(vm, ri, memslot);
+		entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+	}
+
+	/* Fill in page table entry */
+	idx = (gva >> 12) & 0x0ffu;		/* page index */
+	if (!(entry[idx] & PAGE_INVALID))
+		fprintf(stderr,
+			"WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+	entry[idx] = gpa;
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	int ri, idx;
+	uint64_t *entry;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	entry = addr_gpa2hva(vm, vm->pgd);
+	for (ri = 1; ri <= 4; ri++) {
+		idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+		TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+			    "No region mapping for vm virtual address 0x%lx",
+			    gva);
+		entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+	}
+
+	idx = (gva >> 12) & 0x0ffu;		/* page index */
+
+	TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+		    "No page mapping for vm virtual address 0x%lx", gva);
+
+	return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+			   uint64_t ptea_start)
+{
+	uint64_t *pte, ptea;
+
+	for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+		pte = addr_gpa2hva(vm, ptea);
+		if (*pte & PAGE_INVALID)
+			continue;
+		fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+			indent, "", ptea, *pte);
+	}
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+			     uint64_t reg_tab_addr)
+{
+	uint64_t addr, *entry;
+
+	for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+		entry = addr_gpa2hva(vm, addr);
+		if (*entry & REGION_ENTRY_INVALID)
+			continue;
+		fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+			indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+			addr, *entry);
+		if (*entry & REGION_ENTRY_TYPE) {
+			virt_dump_region(stream, vm, indent + 2,
+					 *entry & REGION_ENTRY_ORIGIN);
+		} else {
+			virt_dump_ptes(stream, vm, indent + 2,
+				       *entry & REGION_ENTRY_ORIGIN);
+		}
+	}
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	if (!vm->pgd_created)
+		return;
+
+	virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	/*
+	 * The additional amount of pages required for the page tables is:
+	 * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
+	 * which is definitely smaller than (n / 256) * 2.
+	 */
+	uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_DEFAULT,
+		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
+	uint64_t stack_vaddr;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_run *run;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid);
+
+	/* Setup guest registers */
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+	vcpu_regs_set(vm, vcpuid, &regs);
+
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs.crs[0] |= 0x00040000;		/* Enable floating point regs */
+	sregs.crs[1] = vm->pgd | 0xf;		/* Primary region table */
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+
+	run = vcpu_state(vm, vcpuid);
+	run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
+	run->psw_addr = (uintptr_t)guest_code;
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+	int i;
+
+	TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	for (i = 0; i < num; i++)
+		regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	if (!vcpu)
+		return;
+
+	fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+		indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
new file mode 100644
index 000000000..9d3b0f152
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {
+		.cmd = cmd,
+	};
+	va_list va;
+	int i;
+
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		uc.args[i] = va_arg(va, uint64_t);
+	va_end(va);
+
+	/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+	asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+	struct ucall ucall = {};
+
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
+	if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+	    run->s390_sieic.icptcode == 4 &&
+	    (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
+	    (run->s390_sieic.ipb >> 16) == 0x501) {
+		int reg = run->s390_sieic.ipa & 0xf;
+
+		memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
+		       sizeof(ucall));
+
+		vcpu_run_complete_io(vm, vcpu_id);
+		if (uc)
+			memcpy(uc, &ucall, sizeof(ucall));
+	}
+
+	return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000..031ba3c93
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2086 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64.  A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ *   struct sparsebit *s;
+ *   s = sparsebit_alloc();
+ *   sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure.  This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array.  All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index.  Frequently
+ * used routines include:
+ *
+ *  ---- Query Operations
+ *  sparsebit_is_set(s, idx)
+ *  sparsebit_is_clear(s, idx)
+ *  sparsebit_any_set(s)
+ *  sparsebit_first_set(s)
+ *  sparsebit_next_set(s, prev_idx)
+ *
+ *  ---- Modifying Operations
+ *  sparsebit_set(s, idx)
+ *  sparsebit_clear(s, idx)
+ *  sparsebit_set_num(s, idx, num);
+ *  sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array.  This can be done via code with the following structure:
+ *
+ *   sparsebit_idx_t idx;
+ *   if (sparsebit_any_set(s)) {
+ *     idx = sparsebit_first_set(s);
+ *     do {
+ *       ...
+ *       idx = sparsebit_next_set(s, idx);
+ *     } while (idx != 0);
+ *   }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set.  The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set.  This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller.  One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation.  This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set.  It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ *   typedef uint64_t sparsebit_idx_t;
+ *   typedef uint64_t sparsebit_num_t;
+ *
+ *   sparsebit_idx_t idx;
+ *   uint32_t mask;
+ *   sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after.  The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set.  For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ *   node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ *   node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + Sum of bits set in all the nodes is equal to the value of
+ *     the struct sparsebit_pvt num_set member.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ *
+ *   + Node starting index is evenly divisible by the number of bits
+ *     within a nodes mask member.
+ *
+ *   + Nodes never represent a range of bits that wrap around the
+ *     highest supported index.
+ *
+ *      (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ *     As a consequence of the above, the num_after member of a node
+ *     will always be <=:
+ *
+ *       maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ *   + Nodes within the binary search tree are sorted based on each
+ *     nodes starting index.
+ *
+ *   + The range of bits described by any two nodes do not overlap.  The
+ *     range of bits described by a single node is:
+ *
+ *       start: node->idx
+ *       end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code.  For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated.  Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists.  At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set.  Such temporary violations must be corrected before
+ * returning to the caller.  These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+	struct node *parent;
+	struct node *left;
+	struct node *right;
+	sparsebit_idx_t idx; /* index of least-significant bit in mask */
+	sparsebit_num_t num_after; /* num contiguously set after mask */
+	mask_t mask;
+};
+
+struct sparsebit {
+	/*
+	 * Points to root node of the binary search
+	 * tree.  Equal to NULL when no bits are set in
+	 * the entire sparsebit array.
+	 */
+	struct node *root;
+
+	/*
+	 * A redundant count of the total number of bits set.  Used for
+	 * diagnostic purposes and to change the time complexity of
+	 * sparsebit_num_set() from O(n) to O(1).
+	 * Note: Due to overflow, a value of 0 means none or all set.
+	 */
+	sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+	return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+		;
+
+	return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a right child, next node is the left-most
+	 * of the right child.
+	 */
+	if (nodep->right) {
+		for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+			;
+		return nodep;
+	}
+
+	/*
+	 * No right child.  Go up until node is left child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->right)
+		nodep = nodep->parent;
+
+	return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a left child, next node is the right-most
+	 * of the left child.
+	 */
+	if (nodep->left) {
+		for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+			;
+		return (struct node *) nodep;
+	}
+
+	/*
+	 * No left child.  Go up until node is right child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->left)
+		nodep = nodep->parent;
+
+	return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+	struct node *root;
+
+	/* Duplicate the node at the root of the subtree */
+	root = calloc(1, sizeof(*root));
+	if (!root) {
+		perror("calloc");
+		abort();
+	}
+
+	root->idx = subtree->idx;
+	root->mask = subtree->mask;
+	root->num_after = subtree->num_after;
+
+	/* As needed, recursively duplicate the left and right subtrees */
+	if (subtree->left) {
+		root->left = node_copy_subtree(subtree->left);
+		root->left->parent = root;
+	}
+
+	if (subtree->right) {
+		root->right = node_copy_subtree(subtree->right);
+		root->right->parent = root;
+	}
+
+	return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx.  A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask.  Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			break;
+	}
+
+	return nodep;
+}
+
+/* Entry Requirements:
+ *   + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx.  Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep, *parentp, *prev;
+
+	/* Allocate and initialize the new node. */
+	nodep = calloc(1, sizeof(*nodep));
+	if (!nodep) {
+		perror("calloc");
+		abort();
+	}
+
+	nodep->idx = idx & -MASK_BITS;
+
+	/* If no nodes, set it up as the root node. */
+	if (!s->root) {
+		s->root = nodep;
+		return nodep;
+	}
+
+	/*
+	 * Find the parent where the new node should be attached
+	 * and add the node there.
+	 */
+	parentp = s->root;
+	while (true) {
+		if (idx < parentp->idx) {
+			if (!parentp->left) {
+				parentp->left = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->left;
+		} else {
+			assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+			if (!parentp->right) {
+				parentp->right = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->right;
+		}
+	}
+
+	/*
+	 * Does num_after bits of previous node overlap with the mask
+	 * of the new node?  If so set the bits in the new nodes mask
+	 * and reduce the previous nodes num_after.
+	 */
+	prev = node_prev(s, nodep);
+	while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+		unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+			- nodep->idx;
+		assert(prev->num_after > 0);
+		assert(n1 < MASK_BITS);
+		assert(!(nodep->mask & (1 << n1)));
+		nodep->mask |= (1 << n1);
+		prev->num_after--;
+	}
+
+	return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+	/*
+	 * If any nodes there must be at least one bit set.  Only case
+	 * where a bit is set and total num set is 0, is when all bits
+	 * are set.
+	 */
+	return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+	struct node *tmp;
+	sparsebit_num_t num_set;
+
+	num_set = node_num_set(nodep);
+	assert(s->num_set >= num_set || sparsebit_all_set(s));
+	s->num_set -= node_num_set(nodep);
+
+	/* Have both left and right child */
+	if (nodep->left && nodep->right) {
+		/*
+		 * Move left children to the leftmost leaf node
+		 * of the right child.
+		 */
+		for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+			;
+		tmp->left = nodep->left;
+		nodep->left = NULL;
+		tmp->left->parent = tmp;
+	}
+
+	/* Left only child */
+	if (nodep->left) {
+		if (!nodep->parent) {
+			s->root = nodep->left;
+			nodep->left->parent = NULL;
+		} else {
+			nodep->left->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->left;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->left;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+
+	/* Right only child */
+	if (nodep->right) {
+		if (!nodep->parent) {
+			s->root = nodep->right;
+			nodep->right->parent = NULL;
+		} else {
+			nodep->right->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->right;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->right;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+	/* Leaf Node */
+	if (!nodep->parent) {
+		s->root = NULL;
+	} else {
+		if (nodep->parent->left == nodep)
+			nodep->parent->left = NULL;
+		else {
+			assert(nodep == nodep->parent->right);
+			nodep->parent->right = NULL;
+		}
+	}
+
+	nodep->parent = nodep->left = nodep->right = NULL;
+	free(nodep);
+
+	return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index.  If no such node exists, a new
+ * node at the specified index is created.  Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep1, *nodep2;
+	sparsebit_idx_t offset;
+	sparsebit_num_t orig_num_after;
+
+	assert(!(idx % MASK_BITS));
+
+	/*
+	 * Is there a node that describes the setting of idx?
+	 * If not, add it.
+	 */
+	nodep1 = node_find(s, idx);
+	if (!nodep1)
+		return node_add(s, idx);
+
+	/*
+	 * All done if the starting index of the node is where the
+	 * split should occur.
+	 */
+	if (nodep1->idx == idx)
+		return nodep1;
+
+	/*
+	 * Split point not at start of mask, so it must be part of
+	 * bits described by num_after.
+	 */
+
+	/*
+	 * Calculate offset within num_after for where the split is
+	 * to occur.
+	 */
+	offset = idx - (nodep1->idx + MASK_BITS);
+	orig_num_after = nodep1->num_after;
+
+	/*
+	 * Add a new node to describe the bits starting at
+	 * the split point.
+	 */
+	nodep1->num_after = offset;
+	nodep2 = node_add(s, idx);
+
+	/* Move bits after the split point into the new node */
+	nodep2->num_after = orig_num_after - offset;
+	if (nodep2->num_after >= MASK_BITS) {
+		nodep2->mask = ~(mask_t) 0;
+		nodep2->num_after -= MASK_BITS;
+	} else {
+		nodep2->mask = (1 << nodep2->num_after) - 1;
+		nodep2->num_after = 0;
+	}
+
+	return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form.  For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes.  Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction.  Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered.  It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap.  Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes.  This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+	bool reduction_performed;
+
+	do {
+		reduction_performed = false;
+		struct node *prev, *next, *tmp;
+
+		/* 1) Potential reductions within the current node. */
+
+		/* Nodes with all bits cleared may be removed. */
+		if (nodep->mask == 0 && nodep->num_after == 0) {
+			/*
+			 * About to remove the node pointed to by
+			 * nodep, which normally would cause a problem
+			 * for the next pass through the reduction loop,
+			 * because the node at the starting point no longer
+			 * exists.  This potential problem is handled
+			 * by first remembering the location of the next
+			 * or previous nodes.  Doesn't matter which, because
+			 * once the node at nodep is removed, there will be
+			 * no other nodes between prev and next.
+			 *
+			 * Note, the checks performed on nodep against both
+			 * both prev and next both check for an adjacent
+			 * node that can be reduced into a single node.  As
+			 * such, after removing the node at nodep, doesn't
+			 * matter whether the nodep for the next pass
+			 * through the loop is equal to the previous pass
+			 * prev or next node.  Either way, on the next pass
+			 * the one not selected will become either the
+			 * prev or next node.
+			 */
+			tmp = node_next(s, nodep);
+			if (!tmp)
+				tmp = node_prev(s, nodep);
+
+			node_rm(s, nodep);
+			nodep = NULL;
+
+			nodep = tmp;
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * When the mask is 0, can reduce the amount of num_after
+		 * bits by moving the initial num_after bits into the mask.
+		 */
+		if (nodep->mask == 0) {
+			assert(nodep->num_after != 0);
+			assert(nodep->idx + MASK_BITS > nodep->idx);
+
+			nodep->idx += MASK_BITS;
+
+			if (nodep->num_after >= MASK_BITS) {
+				nodep->mask = ~0;
+				nodep->num_after -= MASK_BITS;
+			} else {
+				nodep->mask = (1u << nodep->num_after) - 1;
+				nodep->num_after = 0;
+			}
+
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * 2) Potential reductions between the current and
+		 * previous nodes.
+		 */
+		prev = node_prev(s, nodep);
+		if (prev) {
+			sparsebit_idx_t prev_highest_bit;
+
+			/* Nodes with no bits set can be removed. */
+			if (prev->mask == 0 && prev->num_after == 0) {
+				node_rm(s, prev);
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * All mask bits set and previous node has
+			 * adjacent index.
+			 */
+			if (nodep->mask + 1 == 0 &&
+			    prev->idx + MASK_BITS == nodep->idx) {
+				prev->num_after += MASK_BITS + nodep->num_after;
+				nodep->mask = 0;
+				nodep->num_after = 0;
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is node adjacent to previous node and the node
+			 * contains a single contiguous range of bits
+			 * starting from the beginning of the mask?
+			 */
+			prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+			if (prev_highest_bit + 1 == nodep->idx &&
+			    (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+				/*
+				 * How many contiguous bits are there?
+				 * Is equal to the total number of set
+				 * bits, due to an earlier check that
+				 * there is a single contiguous range of
+				 * set bits.
+				 */
+				unsigned int num_contiguous
+					= __builtin_popcount(nodep->mask);
+				assert((num_contiguous > 0) &&
+				       ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+				prev->num_after += num_contiguous;
+				nodep->mask = 0;
+
+				/*
+				 * For predictable performance, handle special
+				 * case where all mask bits are set and there
+				 * is a non-zero num_after setting.  This code
+				 * is functionally correct without the following
+				 * conditionalized statements, but without them
+				 * the value of num_after is only reduced by
+				 * the number of mask bits per pass.  There are
+				 * cases where num_after can be close to 2^64.
+				 * Without this code it could take nearly
+				 * (2^64) / 32 passes to perform the full
+				 * reduction.
+				 */
+				if (num_contiguous == MASK_BITS) {
+					prev->num_after += nodep->num_after;
+					nodep->num_after = 0;
+				}
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+
+		/*
+		 * 3) Potential reductions between the current and
+		 * next nodes.
+		 */
+		next = node_next(s, nodep);
+		if (next) {
+			/* Nodes with no bits set can be removed. */
+			if (next->mask == 0 && next->num_after == 0) {
+				node_rm(s, next);
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is next node index adjacent to current node
+			 * and has a mask with all bits set?
+			 */
+			if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+			    next->mask == ~(mask_t) 0) {
+				nodep->num_after += MASK_BITS;
+				next->mask = 0;
+				nodep->num_after += next->num_after;
+				next->num_after = 0;
+
+				node_rm(s, next);
+				next = NULL;
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+	} while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right)
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			goto have_node;
+
+	return false;
+
+have_node:
+	/* Bit is set if it is any of the bits described by num_after */
+	if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+		return true;
+
+	/* Is the corresponding mask bit set */
+	assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+	return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already set */
+	if (sparsebit_is_set(s, idx))
+		return;
+
+	/*
+	 * Get a node where the bit at idx is described by the mask.
+	 * The node_split will also create a node, if there isn't
+	 * already a node that describes the setting of bit.
+	 */
+	nodep = node_split(s, idx & -MASK_BITS);
+
+	/* Set the bit within the nodes mask */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+	nodep->mask |= 1 << (idx - nodep->idx);
+	s->num_set++;
+
+	node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already cleared */
+	if (!sparsebit_is_set(s, idx))
+		return;
+
+	/* Is there a node that describes the setting of this bit? */
+	nodep = node_find(s, idx);
+	if (!nodep)
+		return;
+
+	/*
+	 * If a num_after bit, split the node, so that the bit is
+	 * part of a node mask.
+	 */
+	if (idx >= nodep->idx + MASK_BITS)
+		nodep = node_split(s, idx & -MASK_BITS);
+
+	/*
+	 * After node_split above, bit at idx should be within the mask.
+	 * Clear that bit.
+	 */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(nodep->mask & (1 << (idx - nodep->idx)));
+	nodep->mask &= ~(1 << (idx - nodep->idx));
+	assert(s->num_set > 0 || sparsebit_all_set(s));
+	s->num_set--;
+
+	node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep.  Each line of output
+ * is prefixed by the number of spaces given by indent.  On each
+ * recursion, the indent amount is increased by 2.  This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+	unsigned int indent)
+{
+	char *node_type;
+
+	/* Dump contents of node */
+	if (!nodep->parent)
+		node_type = "root";
+	else if (nodep == nodep->parent->left)
+		node_type = "left";
+	else {
+		assert(nodep == nodep->parent->right);
+		node_type = "right";
+	}
+	fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+	fprintf(stream, "%*s  parent: %p left: %p right: %p\n", indent, "",
+		nodep->parent, nodep->left, nodep->right);
+	fprintf(stream, "%*s  idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+		indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+	/* If present, dump contents of left child nodes */
+	if (nodep->left)
+		dump_nodes(stream, nodep->left, indent + 2);
+
+	/* If present, dump contents of right child nodes */
+	if (nodep->right)
+		dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s.  Each line of output is prefixed with the number
+ * of spaces given by indent.  The output is completely implementation
+ * dependent and subject to change.  Output from this function should only
+ * be used for diagnostic purposes.  For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	/* Dump the contents of s */
+	fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+	fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+	if (s->root)
+		dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+	struct sparsebit *s;
+
+	/* Allocate top level structure. */
+	s = calloc(1, sizeof(*s));
+	if (!s) {
+		perror("calloc");
+		abort();
+	}
+
+	return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+	struct sparsebit *s = *sbitp;
+
+	if (!s)
+		return;
+
+	sparsebit_clear_all(s);
+	free(s);
+	*sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d.  Note, d must have already been allocated via
+ * sparsebit_alloc().  It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+	/* First clear any bits already set in the destination */
+	sparsebit_clear_all(d);
+
+	if (s->root) {
+		d->root = node_copy_subtree(s->root);
+		d->num_set = s->num_set;
+	}
+}
+
+/* Returns whether num consecutive bits starting at idx are all set.  */
+bool sparsebit_is_set_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_cleared;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be set. */
+	if (!sparsebit_is_set(s, idx))
+		return false;
+
+	/* Find the next cleared bit */
+	next_cleared = sparsebit_next_clear(s, idx);
+
+	/*
+	 * If no cleared bits beyond idx, then there are at least num
+	 * set bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough set bits between idx and the next cleared bit.
+	 */
+	return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx.  */
+bool sparsebit_is_clear(struct sparsebit *s,
+	sparsebit_idx_t idx)
+{
+	return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared.  */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_set;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be cleared. */
+	if (!sparsebit_is_clear(s, idx))
+		return false;
+
+	/* Find the next set bit */
+	next_set = sparsebit_next_set(s, idx);
+
+	/*
+	 * If no set bits beyond idx, then there are at least num
+	 * cleared bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough cleared bits between idx and the next set bit.
+	 */
+	return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set.  Note: 0 is also returned for
+ * the case of all bits set.  This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value.  Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+	return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array.  */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+	/*
+	 * Nodes only describe set bits.  If any nodes then there
+	 * is at least 1 bit set.
+	 */
+	if (!s->root)
+		return false;
+
+	/*
+	 * Every node should have a non-zero mask.  For now will
+	 * just assure that the root node has a non-zero mask,
+	 * which is a quick check that at least 1 bit is set.
+	 */
+	assert(s->root->mask != 0);
+	assert(s->num_set > 0 ||
+	       (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+		s->root->mask == ~(mask_t) 0));
+
+	return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared.  */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+	return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+	return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit.  Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	/* Validate at least 1 bit is set */
+	assert(sparsebit_any_set(s));
+
+	nodep = node_first(s);
+	return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit.  Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+	struct node *nodep1, *nodep2;
+
+	/* Validate at least 1 bit is cleared. */
+	assert(sparsebit_any_clear(s));
+
+	/* If no nodes or first node index > 0 then lowest cleared is 0 */
+	nodep1 = node_first(s);
+	if (!nodep1 || nodep1->idx > 0)
+		return 0;
+
+	/* Does the mask in the first node contain any cleared bits. */
+	if (nodep1->mask != ~(mask_t) 0)
+		return node_first_clear(nodep1, 0);
+
+	/*
+	 * All mask bits set in first node.  If there isn't a second node
+	 * then the first cleared bit is the first bit after the bits
+	 * described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2) {
+		/*
+		 * No second node.  First cleared bit is first bit beyond
+		 * bits described by first node.
+		 */
+		assert(nodep1->mask == ~(mask_t) 0);
+		assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+	}
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the first cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t start;
+	struct node *nodep;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Find the leftmost 'candidate' overlapping or to the right
+	 * of lowest_possible.
+	 */
+	struct node *candidate = NULL;
+
+	/* True iff lowest_possible is within candidate */
+	bool contains = false;
+
+	/*
+	 * Find node that describes setting of bit at lowest_possible.
+	 * If such a node doesn't exist, find the node with the lowest
+	 * starting index that is > lowest_possible.
+	 */
+	for (nodep = s->root; nodep;) {
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+			>= lowest_possible) {
+			candidate = nodep;
+			if (candidate->idx <= lowest_possible) {
+				contains = true;
+				break;
+			}
+			nodep = nodep->left;
+		} else {
+			nodep = nodep->right;
+		}
+	}
+	if (!candidate)
+		return 0;
+
+	assert(candidate->mask != 0);
+
+	/* Does the candidate node describe the setting of lowest_possible? */
+	if (!contains) {
+		/*
+		 * Candidate doesn't describe setting of bit at lowest_possible.
+		 * Candidate points to the first node with a starting index
+		 * > lowest_possible.
+		 */
+		assert(candidate->idx > lowest_possible);
+
+		return node_first_set(candidate, 0);
+	}
+
+	/*
+	 * Candidate describes setting of bit at lowest_possible.
+	 * Note: although the node describes the setting of the bit
+	 * at lowest_possible, its possible that its setting and the
+	 * setting of all latter bits described by this node are 0.
+	 * For now, just handle the cases where this node describes
+	 * a bit at or after an index of lowest_possible that is set.
+	 */
+	start = lowest_possible - candidate->idx;
+
+	if (start < MASK_BITS && candidate->mask >= (1 << start))
+		return node_first_set(candidate, start);
+
+	if (candidate->num_after) {
+		sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+		return lowest_possible < first_num_after_idx
+			? first_num_after_idx : lowest_possible;
+	}
+
+	/*
+	 * Although candidate node describes setting of bit at
+	 * the index of lowest_possible, all bits at that index and
+	 * latter that are described by candidate are cleared.  With
+	 * this, the next bit is the first bit in the next node, if
+	 * such a node exists.  If a next node doesn't exist, then
+	 * there is no next set bit.
+	 */
+	candidate = node_next(s, candidate);
+	if (!candidate)
+		return 0;
+
+	return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t idx;
+	struct node *nodep1, *nodep2;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Does a node describing the setting of lowest_possible exist?
+	 * If not, the bit at lowest_possible is cleared.
+	 */
+	nodep1 = node_find(s, lowest_possible);
+	if (!nodep1)
+		return lowest_possible;
+
+	/* Does a mask bit in node 1 describe the next cleared bit. */
+	for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+		if (!(nodep1->mask & (1 << idx)))
+			return nodep1->idx + idx;
+
+	/*
+	 * Next cleared bit is not described by node 1.  If there
+	 * isn't a next node, then next cleared bit is described
+	 * by bit after the bits described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the next cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_set(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_set(s, idx)) {
+		assert(sparsebit_is_set(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num set bits?
+		 */
+		if (sparsebit_is_set_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of set bits at idx isn't large enough.
+		 * Skip this entire sequence of set bits.
+		 */
+		idx = sparsebit_next_clear(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_clear(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_clear(s, idx)) {
+		assert(sparsebit_is_clear(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num cleared bits?
+		 */
+		if (sparsebit_is_clear_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of cleared bits at idx isn't large enough.
+		 * Skip this entire sequence of cleared bits.
+		 */
+		idx = sparsebit_next_set(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/*
+	 * Leading - bits before first mask boundary.
+	 *
+	 * TODO(lhuemill): With some effort it may be possible to
+	 *   replace the following loop with a sequential sequence
+	 *   of statements.  High level sequence would be:
+	 *
+	 *     1. Use node_split() to force node that describes setting
+	 *        of idx to be within the mask portion of a node.
+	 *     2. Form mask of bits to be set.
+	 *     3. Determine number of mask bits already set in the node
+	 *        and store in a local variable named num_already_set.
+	 *     4. Set the appropriate mask bits within the node.
+	 *     5. Increment struct sparsebit_pvt num_set member
+	 *        by the number of bits that were actually set.
+	 *        Exclude from the counts bits that were already set.
+	 *     6. Before returning to the caller, use node_reduce() to
+	 *        handle the multiple corner cases that this method
+	 *        introduces.
+	 */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_set(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed set each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (!(nodep->mask & (1 << n1))) {
+				nodep->mask |= 1 << n1;
+				s->num_set++;
+			}
+		}
+
+		s->num_set -= nodep->num_after;
+		nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+		s->num_set += nodep->num_after;
+
+		node_reduce(s, nodep);
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/* Leading - bits before first mask boundary */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_clear(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed clear each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				nodep->mask &= ~(1 << n1);
+				s->num_set--;
+			}
+		}
+
+		/* Clear any bits described by num_after */
+		s->num_set -= nodep->num_after;
+		nodep->num_after = 0;
+
+		/*
+		 * Delete the node that describes the beginning of
+		 * the middle bits and perform any allowed reductions
+		 * with the nodes prev or next of nodep.
+		 */
+		node_reduce(s, nodep);
+		nodep = NULL;
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx.  */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx.  */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_set_all(struct sparsebit *s)
+{
+	sparsebit_set(s, 0);
+	sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+	sparsebit_clear(s, 0);
+	sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+	sparsebit_idx_t high, bool prepend_comma_space)
+{
+	char *fmt_str;
+	size_t sz;
+
+	/* Determine the printf format string */
+	if (low == high)
+		fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+	else
+		fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+	/*
+	 * When stream is NULL, just determine the size of what would
+	 * have been printed, else print the range.
+	 */
+	if (!stream)
+		sz = snprintf(NULL, 0, fmt_str, low, high);
+	else
+		sz = fprintf(stream, fmt_str, low, high);
+
+	return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s.  Each line of output is prefixed with the number of
+ * spaces given by indent.  The length of each line is implementation
+ * dependent and does not depend on the indent amount.  The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ *   0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set.  Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits.  This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	size_t current_line_len = 0;
+	size_t sz;
+	struct node *nodep;
+
+	if (!sparsebit_any_set(s))
+		return;
+
+	/* Display initial indent */
+	fprintf(stream, "%*s", indent, "");
+
+	/* For each node */
+	for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+		unsigned int n1;
+		sparsebit_idx_t low, high;
+
+		/* For each group of bits in the mask */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				low = high = nodep->idx + n1;
+
+				for (; n1 < MASK_BITS; n1++) {
+					if (nodep->mask & (1 << n1))
+						high = nodep->idx + n1;
+					else
+						break;
+				}
+
+				if ((n1 == MASK_BITS) && nodep->num_after)
+					high += nodep->num_after;
+
+				/*
+				 * How much room will it take to display
+				 * this range.
+				 */
+				sz = display_range(NULL, low, high,
+					current_line_len != 0);
+
+				/*
+				 * If there is not enough room, display
+				 * a newline plus the indent of the next
+				 * line.
+				 */
+				if (current_line_len + sz > DUMP_LINE_MAX) {
+					fputs("\n", stream);
+					fprintf(stream, "%*s", indent, "");
+					current_line_len = 0;
+				}
+
+				/* Display the range */
+				sz = display_range(stream, low, high,
+					current_line_len != 0);
+				current_line_len += sz;
+			}
+		}
+
+		/*
+		 * If num_after and most significant-bit of mask is not
+		 * set, then still need to display a range for the bits
+		 * described by num_after.
+		 */
+		if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+			low = nodep->idx + MASK_BITS;
+			high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+			/*
+			 * How much room will it take to display
+			 * this range.
+			 */
+			sz = display_range(NULL, low, high,
+				current_line_len != 0);
+
+			/*
+			 * If there is not enough room, display
+			 * a newline plus the indent of the next
+			 * line.
+			 */
+			if (current_line_len + sz > DUMP_LINE_MAX) {
+				fputs("\n", stream);
+				fprintf(stream, "%*s", indent, "");
+				current_line_len = 0;
+			}
+
+			/* Display the range */
+			sz = display_range(stream, low, high,
+				current_line_len != 0);
+			current_line_len += sz;
+		}
+	}
+	fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s.  On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+	bool error_detected = false;
+	struct node *nodep, *prev = NULL;
+	sparsebit_num_t total_bits_set = 0;
+	unsigned int n1;
+
+	/* For each node */
+	for (nodep = node_first(s); nodep;
+		prev = nodep, nodep = node_next(s, nodep)) {
+
+		/*
+		 * Increase total bits set by the number of bits set
+		 * in this node.
+		 */
+		for (n1 = 0; n1 < MASK_BITS; n1++)
+			if (nodep->mask & (1 << n1))
+				total_bits_set++;
+
+		total_bits_set += nodep->num_after;
+
+		/*
+		 * Arbitrary choice as to whether a mask of 0 is allowed
+		 * or not.  For diagnostic purposes it is beneficial to
+		 * have only one valid means to represent a set of bits.
+		 * To support this an arbitrary choice has been made
+		 * to not allow a mask of zero.
+		 */
+		if (nodep->mask == 0) {
+			fprintf(stderr, "Node mask of zero, "
+				"nodep: %p nodep->mask: 0x%x",
+				nodep, nodep->mask);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate num_after is not greater than the max index
+		 * - the number of mask bits.  The num_after member
+		 * uses 0-based indexing and thus has no value that
+		 * represents all bits set.  This limitation is handled
+		 * by requiring a non-zero mask.  With a non-zero mask,
+		 * MASK_BITS worth of bits are described by the mask,
+		 * which makes the largest needed num_after equal to:
+		 *
+		 *    (~(sparsebit_num_t) 0) - MASK_BITS + 1
+		 */
+		if (nodep->num_after
+			> (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+			fprintf(stderr, "num_after too large, "
+				"nodep: %p nodep->num_after: 0x%lx",
+				nodep, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Validate node index is divisible by the mask size */
+		if (nodep->idx % MASK_BITS) {
+			fprintf(stderr, "Node index not divisible by "
+				"mask size,\n"
+				"  nodep: %p nodep->idx: 0x%lx "
+				"MASK_BITS: %lu\n",
+				nodep, nodep->idx, MASK_BITS);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate bits described by node don't wrap beyond the
+		 * highest supported index.
+		 */
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+			fprintf(stderr, "Bits described by node wrap "
+				"beyond highest supported index,\n"
+				"  nodep: %p nodep->idx: 0x%lx\n"
+				"  MASK_BITS: %lu nodep->num_after: 0x%lx",
+				nodep, nodep->idx, MASK_BITS, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Check parent pointers. */
+		if (nodep->left) {
+			if (nodep->left->parent != nodep) {
+				fprintf(stderr, "Left child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->left: %p "
+					"nodep->left->parent: %p",
+					nodep, nodep->left,
+					nodep->left->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (nodep->right) {
+			if (nodep->right->parent != nodep) {
+				fprintf(stderr, "Right child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->right: %p "
+					"nodep->right->parent: %p",
+					nodep, nodep->right,
+					nodep->right->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (!nodep->parent) {
+			if (s->root != nodep) {
+				fprintf(stderr, "Unexpected root node, "
+					"s->root: %p nodep: %p",
+					s->root, nodep);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (prev) {
+			/*
+			 * Is index of previous node before index of
+			 * current node?
+			 */
+			if (prev->idx >= nodep->idx) {
+				fprintf(stderr, "Previous node index "
+					">= current node index,\n"
+					"  prev: %p prev->idx: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx",
+					prev, prev->idx, nodep, nodep->idx);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * Nodes occur in asscending order, based on each
+			 * nodes starting index.
+			 */
+			if ((prev->idx + MASK_BITS + prev->num_after - 1)
+				>= nodep->idx) {
+				fprintf(stderr, "Previous node bit range "
+					"overlap with current node bit range,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * When the node has all mask bits set, it shouldn't
+			 * be adjacent to the last bit described by the
+			 * previous node.
+			 */
+			if (nodep->mask == ~(mask_t) 0 &&
+			    prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+				fprintf(stderr, "Current node has mask with "
+					"all bits set and is adjacent to the "
+					"previous node,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+
+				error_detected = true;
+				break;
+			}
+		}
+	}
+
+	if (!error_detected) {
+		/*
+		 * Is sum of bits set in each node equal to the count
+		 * of total bits set.
+		 */
+		if (s->num_set != total_bits_set) {
+			fprintf(stderr, "Number of bits set missmatch,\n"
+				"  s->num_set: 0x%lx total_bits_set: 0x%lx",
+				s->num_set, total_bits_set);
+
+			error_detected = true;
+		}
+	}
+
+	if (error_detected) {
+		fputs("  dump_internal:\n", stderr);
+		sparsebit_dump_internal(stderr, s, 4);
+		abort();
+	}
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver.  Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+	sparsebit_idx_t first, last;
+	bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+	int i;
+
+	for (i = num_ranges; --i >= 0; )
+		if (ranges[i].first <= idx && idx <= ranges[i].last)
+			return ranges[i].set;
+
+	return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+	sparsebit_num_t num;
+	sparsebit_idx_t next;
+
+	if (first < last) {
+		num = last - first + 1;
+	} else {
+		num = first - last + 1;
+		first = last;
+		last = first + num - 1;
+	}
+
+	switch (code) {
+	case 0:
+		sparsebit_set(s, first);
+		assert(sparsebit_is_set(s, first));
+		assert(!sparsebit_is_clear(s, first));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = true };
+		break;
+	case 1:
+		sparsebit_clear(s, first);
+		assert(!sparsebit_is_set(s, first));
+		assert(sparsebit_is_clear(s, first));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (!get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = false };
+		break;
+	case 2:
+		assert(sparsebit_is_set(s, first) == get_value(first));
+		assert(sparsebit_is_clear(s, first) == !get_value(first));
+		break;
+	case 3:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_set_all(s);
+		assert(!sparsebit_any_clear(s));
+		assert(sparsebit_all_set(s));
+		num_ranges = 0;
+		ranges[num_ranges++] = (struct range)
+			{ .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+		break;
+	case 4:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_clear_all(s);
+		assert(!sparsebit_any_set(s));
+		assert(sparsebit_all_clear(s));
+		num_ranges = 0;
+		break;
+	case 5:
+		next = sparsebit_next_set(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || get_value(next));
+		break;
+	case 6:
+		next = sparsebit_next_clear(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || !get_value(next));
+		break;
+	case 7:
+		next = sparsebit_next_clear(s, first);
+		if (sparsebit_is_set_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_set(s, first - 1);
+			else if (sparsebit_any_set(s))
+				next = sparsebit_first_set(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_clear(s, first) || next <= last);
+		}
+		break;
+	case 8:
+		next = sparsebit_next_set(s, first);
+		if (sparsebit_is_clear_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_clear(s, first - 1);
+			else if (sparsebit_any_clear(s))
+				next = sparsebit_first_clear(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_set(s, first) || next <= last);
+		}
+		break;
+	case 9:
+		sparsebit_set_num(s, first, num);
+		assert(sparsebit_is_set_num(s, first, num));
+		assert(!sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = true };
+		break;
+	case 10:
+		sparsebit_clear_num(s, first, num);
+		assert(!sparsebit_is_set_num(s, first, num));
+		assert(sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = false };
+		break;
+	case 11:
+		sparsebit_validate_internal(s);
+		break;
+	default:
+		break;
+	}
+}
+
+unsigned char get8(void)
+{
+	int ch;
+
+	ch = getchar();
+	if (ch == EOF)
+		exit(0);
+	return ch;
+}
+
+uint64_t get64(void)
+{
+	uint64_t x;
+
+	x = get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	return (x << 8) | get8();
+}
+
+int main(void)
+{
+	s = sparsebit_alloc();
+	for (;;) {
+		uint8_t op = get8() & 0xf;
+		uint64_t first = get64();
+		uint64_t last = get64();
+
+		operate(op, first, last);
+	}
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
new file mode 100644
index 000000000..8e04c0b16
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/test_util.c
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "test_util.h"
+
+/*
+ * Parses "[0-9]+[kmgt]?".
+ */
+size_t parse_size(const char *size)
+{
+	size_t base;
+	char *scale;
+	int shift = 0;
+
+	TEST_ASSERT(size && isdigit(size[0]), "Need at least one digit in '%s'", size);
+
+	base = strtoull(size, &scale, 0);
+
+	TEST_ASSERT(base != ULLONG_MAX, "Overflow parsing size!");
+
+	switch (tolower(*scale)) {
+	case 't':
+		shift = 40;
+		break;
+	case 'g':
+		shift = 30;
+		break;
+	case 'm':
+		shift = 20;
+		break;
+	case 'k':
+		shift = 10;
+		break;
+	case 'b':
+	case '\0':
+		shift = 0;
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown size letter %c", *scale);
+	}
+
+	TEST_ASSERT((base << shift) >> shift == base, "Overflow scaling size!");
+
+	return base << shift;
+}
+
+int64_t timespec_to_ns(struct timespec ts)
+{
+	return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec;
+}
+
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns)
+{
+	struct timespec res;
+
+	res.tv_nsec = ts.tv_nsec + ns;
+	res.tv_sec = ts.tv_sec + res.tv_nsec / 1000000000LL;
+	res.tv_nsec %= 1000000000LL;
+
+	return res;
+}
+
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2)
+{
+	int64_t ns1 = timespec_to_ns(ts1);
+	int64_t ns2 = timespec_to_ns(ts2);
+	return timespec_add_ns((struct timespec){0}, ns1 + ns2);
+}
+
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
+{
+	int64_t ns1 = timespec_to_ns(ts1);
+	int64_t ns2 = timespec_to_ns(ts2);
+	return timespec_add_ns((struct timespec){0}, ns1 - ns2);
+}
+
+struct timespec timespec_diff_now(struct timespec start)
+{
+	struct timespec end;
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+	return timespec_sub(end, start);
+}
+
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+	int64_t ns = timespec_to_ns(ts) / divisor;
+
+	return timespec_add_ns((struct timespec){0}, ns);
+}
+
+void print_skip(const char *fmt, ...)
+{
+	va_list ap;
+
+	assert(fmt);
+	va_start(ap, fmt);
+	vprintf(fmt, ap);
+	va_end(ap);
+	puts(", skipping test");
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644
index 000000000..aaf7bc7d2
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+	push %r15
+	push %r14
+	push %r13
+	push %r12
+	push %r11
+	push %r10
+	push %r9
+	push %r8
+
+	push %rdi
+	push %rsi
+	push %rbp
+	push %rbx
+	push %rdx
+	push %rcx
+	push %rax
+	mov %rsp, %rdi
+
+	call route_exception
+
+	pop %rax
+	pop %rcx
+	pop %rdx
+	pop %rbx
+	pop %rbp
+	pop %rsi
+	pop %rdi
+	pop %r8
+	pop %r9
+	pop %r10
+	pop %r11
+	pop %r12
+	pop %r13
+	pop %r14
+	pop %r15
+
+	/* Discard vector and error code. */
+	add $16, %rsp
+	iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+	vector = \from
+	.rept \to - \from + 1
+	.align 8
+
+	/* Fetch current address and append it to idt_handlers. */
+	current_handler = .
+.pushsection .rodata
+.quad current_handler
+.popsection
+
+	.if ! \has_error
+	pushq $0
+	.endif
+	pushq $vector
+	jmp handle_exception
+	vector = vector + 1
+	.endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+	HANDLERS has_error=0 from=0  to=7
+	HANDLERS has_error=1 from=8  to=8
+	HANDLERS has_error=0 from=9  to=9
+	HANDLERS has_error=1 from=10 to=14
+	HANDLERS has_error=0 from=15 to=16
+	HANDLERS has_error=1 from=17 to=17
+	HANDLERS has_error=0 from=18 to=255
+
+.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
new file mode 100644
index 000000000..f5d2d27be
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -0,0 +1,1258 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+vm_vaddr_t exception_handlers;
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t dirty:1;
+	uint64_t reserved_07:1;
+	uint64_t global:1;
+	uint64_t ignored_11_09:3;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+	       uint8_t indent)
+{
+	fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+		"rcx: 0x%.16llx rdx: 0x%.16llx\n",
+		indent, "",
+		regs->rax, regs->rbx, regs->rcx, regs->rdx);
+	fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+		"rsp: 0x%.16llx rbp: 0x%.16llx\n",
+		indent, "",
+		regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+	fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
+		"r10: 0x%.16llx r11: 0x%.16llx\n",
+		indent, "",
+		regs->r8, regs->r9, regs->r10, regs->r11);
+	fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+		"r14: 0x%.16llx r15: 0x%.16llx\n",
+		indent, "",
+		regs->r12, regs->r13, regs->r14, regs->r15);
+	fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+		indent, "",
+		regs->rip, regs->rflags);
+}
+
+/*
+ * Segment Dump
+ *
+ * Input Args:
+ *   stream  - Output FILE stream
+ *   segment - KVM segment
+ *   indent  - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by @segment, to the FILE stream
+ * given by @stream.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+			 uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+		"selector: 0x%.4x type: 0x%.2x\n",
+		indent, "", segment->base, segment->limit,
+		segment->selector, segment->type);
+	fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+		"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+		indent, "", segment->present, segment->dpl,
+		segment->db, segment->s, segment->l);
+	fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+		"unusable: 0x%.2x padding: 0x%.2x\n",
+		indent, "", segment->g, segment->avl,
+		segment->unusable, segment->padding);
+}
+
+/*
+ * dtable Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   dtable - KVM dtable
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
+ * given by @stream.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+			uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+		"padding: 0x%.4x 0x%.4x 0x%.4x\n",
+		indent, "", dtable->base, dtable->limit,
+		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+		uint8_t indent)
+{
+	unsigned int i;
+
+	fprintf(stream, "%*scs:\n", indent, "");
+	segment_dump(stream, &sregs->cs, indent + 2);
+	fprintf(stream, "%*sds:\n", indent, "");
+	segment_dump(stream, &sregs->ds, indent + 2);
+	fprintf(stream, "%*ses:\n", indent, "");
+	segment_dump(stream, &sregs->es, indent + 2);
+	fprintf(stream, "%*sfs:\n", indent, "");
+	segment_dump(stream, &sregs->fs, indent + 2);
+	fprintf(stream, "%*sgs:\n", indent, "");
+	segment_dump(stream, &sregs->gs, indent + 2);
+	fprintf(stream, "%*sss:\n", indent, "");
+	segment_dump(stream, &sregs->ss, indent + 2);
+	fprintf(stream, "%*str:\n", indent, "");
+	segment_dump(stream, &sregs->tr, indent + 2);
+	fprintf(stream, "%*sldt:\n", indent, "");
+	segment_dump(stream, &sregs->ldt, indent + 2);
+
+	fprintf(stream, "%*sgdt:\n", indent, "");
+	dtable_dump(stream, &sregs->gdt, indent + 2);
+	fprintf(stream, "%*sidt:\n", indent, "");
+	dtable_dump(stream, &sregs->idt, indent + 2);
+
+	fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+		"cr3: 0x%.16llx cr4: 0x%.16llx\n",
+		indent, "",
+		sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+	fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+		"apic_base: 0x%.16llx\n",
+		indent, "",
+		sregs->cr8, sregs->efer, sregs->apic_base);
+
+	fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+		fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+			sregs->interrupt_bitmap[i]);
+	}
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	/* If needed, create page map l4 table. */
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	uint32_t pgd_memslot)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x",
+		vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	index[0] = (vaddr >> 12) & 0x1ffu;
+	index[1] = (vaddr >> 21) & 0x1ffu;
+	index[2] = (vaddr >> 30) & 0x1ffu;
+	index[3] = (vaddr >> 39) & 0x1ffu;
+
+	/* Allocate page directory pointer table if not present. */
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present) {
+		pml4e[index[3]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pml4e[index[3]].writable = true;
+		pml4e[index[3]].present = true;
+	}
+
+	/* Allocate page directory table if not present. */
+	struct pageDirectoryPointerEntry *pdpe;
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present) {
+		pdpe[index[2]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pdpe[index[2]].writable = true;
+		pdpe[index[2]].present = true;
+	}
+
+	/* Allocate page table if not present. */
+	struct pageDirectoryEntry *pde;
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present) {
+		pde[index[1]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pde[index[1]].writable = true;
+		pde[index[1]].present = true;
+	}
+
+	/* Fill in page table entry. */
+	struct pageTableEntry *pte;
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	pte[index[0]].address = paddr >> vm->page_shift;
+	pte[index[0]].writable = true;
+	pte[index[0]].present = 1;
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct pageMapL4Entry *pml4e, *pml4e_start;
+	struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+	struct pageDirectoryEntry *pde, *pde_start;
+	struct pageTableEntry *pte, *pte_start;
+
+	if (!vm->pgd_created)
+		return;
+
+	fprintf(stream, "%*s                                          "
+		"                no\n", indent, "");
+	fprintf(stream, "%*s      index hvaddr         gpaddr         "
+		"addr         w exec dirty\n",
+		indent, "");
+	pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+		vm->pgd);
+	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+		pml4e = &pml4e_start[n1];
+		if (!pml4e->present)
+			continue;
+		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+			" %u\n",
+			indent, "",
+			pml4e - pml4e_start, pml4e,
+			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+			pml4e->writable, pml4e->execute_disable);
+
+		pdpe_start = addr_gpa2hva(vm, pml4e->address
+			* vm->page_size);
+		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+			pdpe = &pdpe_start[n2];
+			if (!pdpe->present)
+				continue;
+			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+				"%u  %u\n",
+				indent, "",
+				pdpe - pdpe_start, pdpe,
+				addr_hva2gpa(vm, pdpe),
+				(uint64_t) pdpe->address, pdpe->writable,
+				pdpe->execute_disable);
+
+			pde_start = addr_gpa2hva(vm,
+				pdpe->address * vm->page_size);
+			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+				pde = &pde_start[n3];
+				if (!pde->present)
+					continue;
+				fprintf(stream, "%*spde   0x%-3zx %p "
+					"0x%-12lx 0x%-10lx %u  %u\n",
+					indent, "", pde - pde_start, pde,
+					addr_hva2gpa(vm, pde),
+					(uint64_t) pde->address, pde->writable,
+					pde->execute_disable);
+
+				pte_start = addr_gpa2hva(vm,
+					pde->address * vm->page_size);
+				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+					pte = &pte_start[n4];
+					if (!pte->present)
+						continue;
+					fprintf(stream, "%*spte   0x%-3zx %p "
+						"0x%-12lx 0x%-10lx %u  %u "
+						"    %u    0x%-10lx\n",
+						indent, "",
+						pte - pte_start, pte,
+						addr_hva2gpa(vm, pte),
+						(uint64_t) pte->address,
+						pte->writable,
+						pte->execute_disable,
+						pte->dirty,
+						((uint64_t) n1 << 27)
+							| ((uint64_t) n2 << 18)
+							| ((uint64_t) n3 << 9)
+							| ((uint64_t) n4));
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by @segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+	void *gdt = addr_gva2hva(vm, vm->gdt);
+	struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+	desc->limit0 = segp->limit & 0xFFFF;
+	desc->base0 = segp->base & 0xFFFF;
+	desc->base1 = segp->base >> 16;
+	desc->type = segp->type;
+	desc->s = segp->s;
+	desc->dpl = segp->dpl;
+	desc->p = segp->present;
+	desc->limit1 = segp->limit >> 16;
+	desc->avl = segp->avl;
+	desc->l = segp->l;
+	desc->db = segp->db;
+	desc->g = segp->g;
+	desc->base2 = segp->base >> 24;
+	if (!segp->s)
+		desc->base3 = segp->base >> 32;
+}
+
+
+/*
+ * Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by @segp, to be a code segment
+ * with the selector value given by @selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+					  * | kFlagCodeReadable
+					  */
+	segp->g = true;
+	segp->l = true;
+	segp->present = 1;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/*
+ * Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by @segp, to be a data segment
+ * with the selector value given by @selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+					  * | kFlagDataWritable
+					  */
+	segp->g = true;
+	segp->present = true;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+	struct pageDirectoryPointerEntry *pdpe;
+	struct pageDirectoryEntry *pde;
+	struct pageTableEntry *pte;
+
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	index[0] = (gva >> 12) & 0x1ffu;
+	index[1] = (gva >> 21) & 0x1ffu;
+	index[2] = (gva >> 30) & 0x1ffu;
+	index[3] = (gva >> 39) & 0x1ffu;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present)
+		goto unmapped_gva;
+
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present)
+		goto unmapped_gva;
+
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present)
+		goto unmapped_gva;
+
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	if (!pte[index[0]].present)
+		goto unmapped_gva;
+
+	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+	exit(EXIT_FAILURE);
+}
+
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
+			  int pgd_memslot)
+{
+	if (!vm->gdt)
+		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	dt->base = vm->gdt;
+	dt->limit = getpagesize();
+}
+
+static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
+				int selector, int gdt_memslot,
+				int pgd_memslot)
+{
+	if (!vm->tss)
+		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	memset(segp, 0, sizeof(*segp));
+	segp->base = vm->tss;
+	segp->limit = 0x67;
+	segp->selector = selector;
+	segp->type = 0xb;
+	segp->present = 1;
+	kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct kvm_sregs sregs;
+
+	/* Set mode specific system register values. */
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+
+	sregs.idt.limit = 0;
+
+	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+
+	switch (vm->mode) {
+	case VM_MODE_PXXV48_4K:
+		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+		kvm_seg_set_unusable(&sregs.ldt);
+		kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
+		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+		break;
+
+	default:
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sregs.cr3 = vm->pgd;
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	vm_vaddr_t stack_vaddr;
+	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	/* Create VCPU */
+	vm_vcpu_add(vm, vcpuid);
+	vcpu_setup(vm, vcpuid, 0, 0);
+
+	/* Setup guest general purpose registers */
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs.rflags = regs.rflags | 0x2;
+	regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+	regs.rip = (unsigned long) guest_code;
+	vcpu_regs_set(vm, vcpuid, &regs);
+
+	/* Setup the MP state */
+	mp_state.mp_state = 0;
+	vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/*
+ * Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/*
+ * KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+/*
+ * Locate a cpuid entry.
+ *
+ * Input Args:
+ *   function: The function of the cpuid entry to find.
+ *   index: The index of the cpuid entry.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	cpuid = kvm_get_supported_cpuid();
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
+/*
+ * VM VCPU CPUID Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU id
+ *   cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+		uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+	TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+		    rc, errno);
+
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	struct kvm_vm *vm;
+	/*
+	 * For x86 the maximum page table size for a memory region
+	 * will be when only 4K pages are used.  In that case the
+	 * total extra size for page tables (for extra N pages) will
+	 * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+	 * than N/512*2.
+	 */
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	/* Create VM */
+	vm = vm_create(VM_MODE_DEFAULT,
+		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+		       O_RDWR);
+
+	/* Setup guest code */
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+	/* Setup IRQ Chip */
+	vm_create_irqchip(vm);
+
+	/* Add the first vCPU. */
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+/*
+ * VCPU Get MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	return buffer.entry.data;
+}
+
+/*
+ * _VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: The result of KVM_SET_MSRS.
+ *
+ * Sets the value of an MSR for the given VCPU.
+ */
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+		  uint64_t msr_value)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	memset(&buffer, 0, sizeof(buffer));
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	buffer.entry.data = msr_value;
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+	return r;
+}
+
+/*
+ * VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value)
+{
+	int r;
+
+	r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
+	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+
+	fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+	fprintf(stream, "%*sregs:\n", indent + 2, "");
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs_dump(stream, &regs, indent + 4);
+
+	fprintf(stream, "%*ssregs:\n", indent + 2, "");
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs_dump(stream, &sregs, indent + 4);
+}
+
+struct kvm_x86_state {
+	struct kvm_vcpu_events events;
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	struct kvm_xsave xsave;
+	struct kvm_xcrs xcrs;
+	struct kvm_sregs sregs;
+	struct kvm_debugregs debugregs;
+	union {
+		struct kvm_nested_state nested;
+		char nested_[16384];
+	};
+	struct kvm_msrs msrs;
+};
+
+static int kvm_get_num_msrs_fd(int kvm_fd)
+{
+	struct kvm_msr_list nmsrs;
+	int r;
+
+	nmsrs.nmsrs = 0;
+	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+	TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+		r);
+
+	return nmsrs.nmsrs;
+}
+
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+	return kvm_get_num_msrs_fd(vm->kvm_fd);
+}
+
+struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+	struct kvm_msr_list *list;
+	int nmsrs, r, kvm_fd;
+
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	nmsrs = kvm_get_num_msrs_fd(kvm_fd);
+	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+	list->nmsrs = nmsrs;
+	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+	close(kvm_fd);
+
+	TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+		r);
+
+	return list;
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct kvm_msr_list *list;
+	struct kvm_x86_state *state;
+	int nmsrs, r, i;
+	static int nested_size = -1;
+
+	if (nested_size == -1) {
+		nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+		TEST_ASSERT(nested_size <= sizeof(state->nested_),
+			    "Nested state size too big, %i > %zi",
+			    nested_size, sizeof(state->nested_));
+	}
+
+	/*
+	 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+	 * guest state is consistent only after userspace re-enters the
+	 * kernel with KVM_RUN.  Complete IO prior to migrating state
+	 * to a new VM.
+	 */
+	vcpu_run_complete_io(vm, vcpuid);
+
+	nmsrs = kvm_get_num_msrs(vm);
+	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+	list->nmsrs = nmsrs;
+	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+                r);
+
+	state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
+	r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+                r);
+
+	if (kvm_check_cap(KVM_CAP_XCRS)) {
+		r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+			    r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+                r);
+
+	if (nested_size) {
+		state->nested.size = sizeof(state->nested_);
+		r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
+			r);
+		TEST_ASSERT(state->nested.size <= nested_size,
+			"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+			state->nested.size, nested_size);
+	} else
+		state->nested.size = 0;
+
+	state->msrs.nmsrs = nmsrs;
+	for (i = 0; i < nmsrs; i++)
+		state->msrs.entries[i].index = list->indices[i];
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
+        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
+                r, r == nmsrs ? -1 : list->indices[r]);
+
+	r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+                r);
+
+	free(list);
+	return state;
+}
+
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int r;
+
+	r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+                r);
+
+	if (kvm_check_cap(KVM_CAP_XCRS)) {
+		r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+			    r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+        TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+                r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
+	r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+                r);
+
+	if (state->nested.size) {
+		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+			r);
+	}
+}
+
+bool is_intel_cpu(void)
+{
+	int eax, ebx, ecx, edx;
+	const uint32_t *chunk;
+	const int leaf = 0;
+
+	__asm__ __volatile__(
+		"cpuid"
+		: /* output */ "=a"(eax), "=b"(ebx),
+		  "=c"(ecx), "=d"(edx)
+		: /* input */ "0"(leaf), "2"(0));
+
+	chunk = (const uint32_t *)("GenuineIntel");
+	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+uint32_t kvm_get_cpuid_max_basic(void)
+{
+	return kvm_get_supported_cpuid_entry(0)->eax;
+}
+
+uint32_t kvm_get_cpuid_max_extended(void)
+{
+	return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+	struct kvm_cpuid_entry2 *entry;
+	bool pae;
+
+	/* SDM 4.1.4 */
+	if (kvm_get_cpuid_max_extended() < 0x80000008) {
+		pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
+		*pa_bits = pae ? 36 : 32;
+		*va_bits = 32;
+	} else {
+		entry = kvm_get_supported_cpuid_entry(0x80000008);
+		*pa_bits = entry->eax & 0xff;
+		*va_bits = (entry->eax >> 8) & 0xff;
+	}
+}
+
+struct idt_entry {
+	uint16_t offset0;
+	uint16_t selector;
+	uint16_t ist : 3;
+	uint16_t : 5;
+	uint16_t type : 4;
+	uint16_t : 1;
+	uint16_t dpl : 2;
+	uint16_t p : 1;
+	uint16_t offset1;
+	uint32_t offset2; uint32_t reserved;
+};
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+			  int dpl, unsigned short selector)
+{
+	struct idt_entry *base =
+		(struct idt_entry *)addr_gva2hva(vm, vm->idt);
+	struct idt_entry *e = &base[vector];
+
+	memset(e, 0, sizeof(*e));
+	e->offset0 = addr;
+	e->selector = selector;
+	e->ist = 0;
+	e->type = 14;
+	e->dpl = dpl;
+	e->p = 1;
+	e->offset1 = addr >> 16;
+	e->offset2 = addr >> 32;
+}
+
+void kvm_exit_unexpected_vector(uint32_t value)
+{
+	outl(UNEXPECTED_VECTOR_PORT, value);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+	typedef void(*handler)(struct ex_regs *);
+	handler *handlers = (handler *)exception_handlers;
+
+	if (handlers && handlers[regs->vector]) {
+		handlers[regs->vector](regs);
+		return;
+	}
+
+	kvm_exit_unexpected_vector(regs->vector);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+	extern void *idt_handlers;
+	int i;
+
+	vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
+	vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+	/* Handlers have the same address in both address spaces.*/
+	for (i = 0; i < NUM_INTERRUPTS; i++)
+		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+			DEFAULT_CODE_SELECTOR);
+}
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_sregs sregs;
+
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs.idt.base = vm->idt;
+	sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+	sregs.gdt.base = vm->gdt;
+	sregs.gdt.limit = getpagesize() - 1;
+	kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+			 void (*handler)(struct ex_regs *))
+{
+	vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+	handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
+		&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
+		&& vcpu_state(vm, vcpuid)->io.size == 4) {
+		/* Grab pointer to io data */
+		uint32_t *data = (void *)vcpu_state(vm, vcpuid)
+			+ vcpu_state(vm, vcpuid)->io.data_offset;
+
+		TEST_ASSERT(false,
+			    "Unexpected vectored event in guest (vector:0x%x)",
+			    *data);
+	}
+}
+
+bool set_cpuid(struct kvm_cpuid2 *cpuid,
+	       struct kvm_cpuid_entry2 *ent)
+{
+	int i;
+
+	for (i = 0; i < cpuid->nent; i++) {
+		struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+		if (cur->function != ent->function || cur->index != ent->index)
+			continue;
+
+		memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
+		return true;
+	}
+
+	return false;
+}
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+		       uint64_t a3)
+{
+	uint64_t r;
+
+	asm volatile("vmcall"
+		     : "=a"(r)
+		     : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+	return r;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
new file mode 100644
index 000000000..a58507a7b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/svm.c
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "svm_util.h"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+	vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
+					    0x10000, 0, 0);
+	struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+	svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
+					   0x10000, 0, 0);
+	svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+	svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+	svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						0x10000, 0, 0);
+	svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+	svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+	*p_svm_gva = svm_gva;
+	return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+			 u64 base, u32 limit, u32 attr)
+{
+	seg->selector = selector;
+	seg->attrib = attr;
+	seg->limit = limit;
+	seg->base = base;
+}
+
+/*
+ * Avoid using memset to clear the vmcb, since libc may not be
+ * available in L1 (and, even if it is, features that libc memset may
+ * want to use, like AVX, may not be enabled).
+ */
+static void clear_vmcb(struct vmcb *vmcb)
+{
+	int n = sizeof(*vmcb) / sizeof(u32);
+
+	asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory");
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+	struct vmcb *vmcb = svm->vmcb;
+	uint64_t vmcb_gpa = svm->vmcb_gpa;
+	struct vmcb_save_area *save = &vmcb->save;
+	struct vmcb_control_area *ctrl = &vmcb->control;
+	u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+	      | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+	u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+		| SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+	uint64_t efer;
+
+	efer = rdmsr(MSR_EFER);
+	wrmsr(MSR_EFER, efer | EFER_SVME);
+	wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+	clear_vmcb(vmcb);
+	asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
+	vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+	vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+	vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+	vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+	vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+	vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+	ctrl->asid = 1;
+	save->cpl = 0;
+	save->efer = rdmsr(MSR_EFER);
+	asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+	asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+	asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+	asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+	asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+	asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+	save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+	save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+	ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+				(1ULL << INTERCEPT_VMMCALL);
+
+	vmcb->save.rip = (u64)guest_rip;
+	vmcb->save.rsp = (u64)guest_rsp;
+	guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C				\
+	"xchg %%rbx, guest_regs+0x20\n\t"	\
+	"xchg %%rcx, guest_regs+0x10\n\t"	\
+	"xchg %%rdx, guest_regs+0x18\n\t"	\
+	"xchg %%rbp, guest_regs+0x30\n\t"	\
+	"xchg %%rsi, guest_regs+0x38\n\t"	\
+	"xchg %%rdi, guest_regs+0x40\n\t"	\
+	"xchg %%r8,  guest_regs+0x48\n\t"	\
+	"xchg %%r9,  guest_regs+0x50\n\t"	\
+	"xchg %%r10, guest_regs+0x58\n\t"	\
+	"xchg %%r11, guest_regs+0x60\n\t"	\
+	"xchg %%r12, guest_regs+0x68\n\t"	\
+	"xchg %%r13, guest_regs+0x70\n\t"	\
+	"xchg %%r14, guest_regs+0x78\n\t"	\
+	"xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C      SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+	asm volatile (
+		"vmload %[vmcb_gpa]\n\t"
+		"mov rflags, %%r15\n\t"	// rflags
+		"mov %%r15, 0x170(%[vmcb])\n\t"
+		"mov guest_regs, %%r15\n\t"	// rax
+		"mov %%r15, 0x1f8(%[vmcb])\n\t"
+		LOAD_GPR_C
+		"vmrun %[vmcb_gpa]\n\t"
+		SAVE_GPR_C
+		"mov 0x170(%[vmcb]), %%r15\n\t"	// rflags
+		"mov %%r15, rflags\n\t"
+		"mov 0x1f8(%[vmcb]), %%r15\n\t"	// rax
+		"mov %%r15, guest_regs\n\t"
+		"vmsave %[vmcb_gpa]\n\t"
+		: : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+		: "r15", "memory");
+}
+
+bool nested_svm_supported(void)
+{
+	struct kvm_cpuid_entry2 *entry =
+		kvm_get_supported_cpuid_entry(0x80000001);
+
+	return entry->ecx & CPUID_SVM;
+}
+
+void nested_svm_check_supported(void)
+{
+	if (!nested_svm_supported()) {
+		print_skip("nested SVM not enabled");
+		exit(KSFT_SKIP);
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
new file mode 100644
index 000000000..a3489973e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {
+		.cmd = cmd,
+	};
+	va_list va;
+	int i;
+
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		uc.args[i] = va_arg(va, uint64_t);
+	va_end(va);
+
+	asm volatile("in %[port], %%al"
+		: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+	struct ucall ucall = {};
+
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
+	if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+		struct kvm_regs regs;
+
+		vcpu_regs_get(vm, vcpu_id, &regs);
+		memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
+		       sizeof(ucall));
+
+		vcpu_run_complete_io(vm, vcpu_id);
+		if (uc)
+			memcpy(uc, &ucall, sizeof(ucall));
+	}
+
+	return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
new file mode 100644
index 000000000..2448b30e8
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/vmx.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PAGE_SHIFT_4K  12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
+bool enable_evmcs;
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+struct eptPageTableEntry {
+	uint64_t readable:1;
+	uint64_t writable:1;
+	uint64_t executable:1;
+	uint64_t memory_type:3;
+	uint64_t ignore_pat:1;
+	uint64_t page_size:1;
+	uint64_t accessed:1;
+	uint64_t dirty:1;
+	uint64_t ignored_11_10:2;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+	uint64_t memory_type:3;
+	uint64_t page_walk_length:3;
+	uint64_t ad_enabled:1;
+	uint64_t reserved_11_07:5;
+	uint64_t address:40;
+	uint64_t reserved_63_52:12;
+};
+int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+{
+	uint16_t evmcs_ver;
+
+	struct kvm_enable_cap enable_evmcs_cap = {
+		.cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+		 .args[0] = (unsigned long)&evmcs_ver
+	};
+
+	vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+	/* KVM should return supported EVMCS version range */
+	TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
+		    (evmcs_ver & 0xff) > 0,
+		    "Incorrect EVMCS version range: %x:%x\n",
+		    evmcs_ver & 0xff, evmcs_ver >> 8);
+
+	return evmcs_ver;
+}
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+	vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+	/* Setup of a region of guest memory for the vmxon region. */
+	vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+	vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+	/* Setup of a region of guest memory for a vmcs. */
+	vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+	vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+	/* Setup of a region of guest memory for the MSR bitmap. */
+	vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+	vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+	memset(vmx->msr_hva, 0, getpagesize());
+
+	/* Setup of a region of guest memory for the shadow VMCS. */
+	vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+	vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+	/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+	vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+	vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+	memset(vmx->vmread_hva, 0, getpagesize());
+
+	vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+	memset(vmx->vmwrite_hva, 0, getpagesize());
+
+	/* Setup of a region of guest memory for the VP Assist page. */
+	vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						0x10000, 0, 0);
+	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
+	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
+
+	/* Setup of a region of guest memory for the enlightened VMCS. */
+	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						       0x10000, 0, 0);
+	vmx->enlightened_vmcs_hva =
+		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
+	vmx->enlightened_vmcs_gpa =
+		addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
+
+	*p_vmx_gva = vmx_gva;
+	return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+	uint64_t feature_control;
+	uint64_t required;
+	unsigned long cr0;
+	unsigned long cr4;
+
+	/*
+	 * Ensure bits in CR0 and CR4 are valid in VMX operation:
+	 * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+	 * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+	 */
+	__asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+	cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	__asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+	__asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+	cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	/* Enable VMX operation */
+	cr4 |= X86_CR4_VMXE;
+	__asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+	/*
+	 * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+	 *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
+	 *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+	 *    outside of SMX causes a #GP.
+	 */
+	required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+	required |= FEAT_CTL_LOCKED;
+	feature_control = rdmsr(MSR_IA32_FEAT_CTL);
+	if ((feature_control & required) != required)
+		wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
+
+	/* Enter VMX root operation. */
+	*(uint32_t *)(vmx->vmxon) = vmcs_revision();
+	if (vmxon(vmx->vmxon_gpa))
+		return false;
+
+	return true;
+}
+
+bool load_vmcs(struct vmx_pages *vmx)
+{
+	if (!enable_evmcs) {
+		/* Load a VMCS. */
+		*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+		if (vmclear(vmx->vmcs_gpa))
+			return false;
+
+		if (vmptrld(vmx->vmcs_gpa))
+			return false;
+
+		/* Setup shadow VMCS, do not load it yet. */
+		*(uint32_t *)(vmx->shadow_vmcs) =
+			vmcs_revision() | 0x80000000ul;
+		if (vmclear(vmx->shadow_vmcs_gpa))
+			return false;
+	} else {
+		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
+				  vmx->enlightened_vmcs))
+			return false;
+		current_evmcs->revision_id = EVMCS_VERSION;
+	}
+
+	return true;
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+	uint32_t sec_exec_ctl = 0;
+
+	vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+	vmwrite(POSTED_INTR_NV, 0);
+
+	vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+
+	if (vmx->eptp_gpa) {
+		uint64_t ept_paddr;
+		struct eptPageTablePointer eptp = {
+			.memory_type = VMX_BASIC_MEM_TYPE_WB,
+			.page_walk_length = 3, /* + 1 */
+			.ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+			.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+		};
+
+		memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+		vmwrite(EPT_POINTER, ept_paddr);
+		sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+	}
+
+	if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+			rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+	else {
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+		GUEST_ASSERT(!sec_exec_ctl);
+	}
+
+	vmwrite(EXCEPTION_BITMAP, 0);
+	vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+	vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+	vmwrite(CR3_TARGET_COUNT, 0);
+	vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+		VM_EXIT_HOST_ADDR_SPACE_SIZE);	  /* 64-bit host */
+	vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+	vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+		VM_ENTRY_IA32E_MODE);		  /* 64-bit guest */
+	vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+	vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+	vmwrite(TPR_THRESHOLD, 0);
+
+	vmwrite(CR0_GUEST_HOST_MASK, 0);
+	vmwrite(CR4_GUEST_HOST_MASK, 0);
+	vmwrite(CR0_READ_SHADOW, get_cr0());
+	vmwrite(CR4_READ_SHADOW, get_cr4());
+
+	vmwrite(MSR_BITMAP, vmx->msr_gpa);
+	vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+	vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+	uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+	vmwrite(HOST_ES_SELECTOR, get_es());
+	vmwrite(HOST_CS_SELECTOR, get_cs());
+	vmwrite(HOST_SS_SELECTOR, get_ss());
+	vmwrite(HOST_DS_SELECTOR, get_ds());
+	vmwrite(HOST_FS_SELECTOR, get_fs());
+	vmwrite(HOST_GS_SELECTOR, get_gs());
+	vmwrite(HOST_TR_SELECTOR, get_tr());
+
+	if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+		vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+	if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+		vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+	if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+		vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+			rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+	vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+	vmwrite(HOST_CR0, get_cr0());
+	vmwrite(HOST_CR3, get_cr3());
+	vmwrite(HOST_CR4, get_cr4());
+	vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+	vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+	vmwrite(HOST_TR_BASE,
+		get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+	vmwrite(HOST_GDTR_BASE, get_gdt().address);
+	vmwrite(HOST_IDTR_BASE, get_idt().address);
+	vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+	vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+	vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+	vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+	vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+	vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+	vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+	vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+	vmwrite(GUEST_LDTR_SELECTOR, 0);
+	vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+	vmwrite(GUEST_INTR_STATUS, 0);
+	vmwrite(GUEST_PML_INDEX, 0);
+
+	vmwrite(VMCS_LINK_POINTER, -1ll);
+	vmwrite(GUEST_IA32_DEBUGCTL, 0);
+	vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+	vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+	vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+		vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+	vmwrite(GUEST_ES_LIMIT, -1);
+	vmwrite(GUEST_CS_LIMIT, -1);
+	vmwrite(GUEST_SS_LIMIT, -1);
+	vmwrite(GUEST_DS_LIMIT, -1);
+	vmwrite(GUEST_FS_LIMIT, -1);
+	vmwrite(GUEST_GS_LIMIT, -1);
+	vmwrite(GUEST_LDTR_LIMIT, -1);
+	vmwrite(GUEST_TR_LIMIT, 0x67);
+	vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+	vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+	vmwrite(GUEST_ES_AR_BYTES,
+		vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+	vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+	vmwrite(GUEST_DS_AR_BYTES,
+		vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_FS_AR_BYTES,
+		vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_GS_AR_BYTES,
+		vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+	vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+	vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+	vmwrite(GUEST_ACTIVITY_STATE, 0);
+	vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+	vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+	vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+	vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+	vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+	vmwrite(GUEST_ES_BASE, 0);
+	vmwrite(GUEST_CS_BASE, 0);
+	vmwrite(GUEST_SS_BASE, 0);
+	vmwrite(GUEST_DS_BASE, 0);
+	vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+	vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+	vmwrite(GUEST_LDTR_BASE, 0);
+	vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+	vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+	vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+	vmwrite(GUEST_DR7, 0x400);
+	vmwrite(GUEST_RSP, (uint64_t)rsp);
+	vmwrite(GUEST_RIP, (uint64_t)rip);
+	vmwrite(GUEST_RFLAGS, 2);
+	vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+	vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+	vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+	init_vmcs_control_fields(vmx);
+	init_vmcs_host_state();
+	init_vmcs_guest_state(guest_rip, guest_rsp);
+}
+
+bool nested_vmx_supported(void)
+{
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	return entry->ecx & CPUID_VMX;
+}
+
+void nested_vmx_check_supported(void)
+{
+	if (!nested_vmx_supported()) {
+		print_skip("nested VMX not enabled");
+		exit(KSFT_SKIP);
+	}
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+	 	   uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+{
+	uint16_t index[4];
+	struct eptPageTableEntry *pml4e;
+
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+		    "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+		    "Nested physical address not on page boundary,\n"
+		    "  nested_paddr: 0x%lx vm->page_size: 0x%x",
+		    nested_paddr, vm->page_size);
+	TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+		    "Physical address beyond beyond maximum supported,\n"
+		    "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		    paddr, vm->max_gfn, vm->page_size);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		    "Physical address not on page boundary,\n"
+		    "  paddr: 0x%lx vm->page_size: 0x%x",
+		    paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		    "Physical address beyond beyond maximum supported,\n"
+		    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		    paddr, vm->max_gfn, vm->page_size);
+
+	index[0] = (nested_paddr >> 12) & 0x1ffu;
+	index[1] = (nested_paddr >> 21) & 0x1ffu;
+	index[2] = (nested_paddr >> 30) & 0x1ffu;
+	index[3] = (nested_paddr >> 39) & 0x1ffu;
+
+	/* Allocate page directory pointer table if not present. */
+	pml4e = vmx->eptp_hva;
+	if (!pml4e[index[3]].readable) {
+		pml4e[index[3]].address = vm_phy_page_alloc(vm,
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			>> vm->page_shift;
+		pml4e[index[3]].writable = true;
+		pml4e[index[3]].readable = true;
+		pml4e[index[3]].executable = true;
+	}
+
+	/* Allocate page directory table if not present. */
+	struct eptPageTableEntry *pdpe;
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].readable) {
+		pdpe[index[2]].address = vm_phy_page_alloc(vm,
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			>> vm->page_shift;
+		pdpe[index[2]].writable = true;
+		pdpe[index[2]].readable = true;
+		pdpe[index[2]].executable = true;
+	}
+
+	/* Allocate page table if not present. */
+	struct eptPageTableEntry *pde;
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].readable) {
+		pde[index[1]].address = vm_phy_page_alloc(vm,
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			>> vm->page_shift;
+		pde[index[1]].writable = true;
+		pde[index[1]].readable = true;
+		pde[index[1]].executable = true;
+	}
+
+	/* Fill in page table entry. */
+	struct eptPageTableEntry *pte;
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	pte[index[0]].address = paddr >> vm->page_shift;
+	pte[index[0]].writable = true;
+	pte[index[0]].readable = true;
+	pte[index[0]].executable = true;
+
+	/*
+	 * For now mark these as accessed and dirty because the only
+	 * testcase we have needs that.  Can be reconsidered later.
+	 */
+	pte[index[0]].accessed = true;
+	pte[index[0]].dirty = true;
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   nested_paddr - Nested guest physical address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   eptp_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+		uint32_t eptp_memslot)
+{
+	size_t page_size = vm->page_size;
+	size_t npages = size / page_size;
+
+	TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+	while (npages--) {
+		nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+		nested_paddr += page_size;
+		paddr += page_size;
+	}
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+			uint32_t memslot, uint32_t eptp_memslot)
+{
+	sparsebit_idx_t i, last;
+	struct userspace_mem_region *region =
+		memslot2region(vm, memslot);
+
+	i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+	last = i + (region->region.memory_size >> vm->page_shift);
+	for (;;) {
+		i = sparsebit_next_clear(region->unused_phy_pages, i);
+		if (i > last)
+			break;
+
+		nested_map(vmx, vm,
+			   (uint64_t)i << vm->page_shift,
+			   (uint64_t)i << vm->page_shift,
+			   1 << vm->page_shift,
+			   eptp_memslot);
+	}
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+		  uint32_t eptp_memslot)
+{
+	vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+	vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
+				      uint32_t eptp_memslot)
+{
+	vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						  0x10000, 0, 0);
+	vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+	vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
new file mode 100644
index 000000000..9f49ead38
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 1
+
+static uint8_t mem1[65536];
+static uint8_t mem2[65536];
+
+static void guest_code(void)
+{
+	int i;
+
+	for (;;) {
+		for (i = 0; i < sizeof(mem2); i++)
+			mem2[i] = mem1[i];
+		GUEST_SYNC(0);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_s390_mem_op ksmo;
+	int rv, i, maxsize;
+
+	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
+
+	maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
+	if (!maxsize) {
+		print_skip("CAP_S390_MEM_OP not supported");
+		exit(KSFT_SKIP);
+	}
+	if (maxsize > sizeof(mem1))
+		maxsize = sizeof(mem1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	run = vcpu_state(vm, VCPU_ID);
+
+	for (i = 0; i < sizeof(mem1); i++)
+		mem1[i] = i * i + i;
+
+	/* Set the first array */
+	ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
+	ksmo.flags = 0;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+
+	/* Let the guest code copy the first array to the second */
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	memset(mem2, 0xaa, sizeof(mem2));
+
+	/* Get the second array */
+	ksmo.gaddr = (uintptr_t)mem2;
+	ksmo.flags = 0;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+	ksmo.buf = (uintptr_t)mem2;
+	ksmo.ar = 0;
+	vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+
+	TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+		    "Memory contents do not match!");
+
+	/* Check error conditions - first bad size: */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = 0;
+	ksmo.size = -1;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+	/* Zero size: */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = 0;
+	ksmo.size = 0;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+		    "ioctl allows 0 as size");
+
+	/* Bad flags: */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = -1;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+	/* Bad operation: */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = 0;
+	ksmo.size = maxsize;
+	ksmo.op = -1;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+	/* Bad guest address: */
+	ksmo.gaddr = ~0xfffUL;
+	ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
+
+	/* Bad host address: */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = 0;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = 0;
+	ksmo.ar = 0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EFAULT,
+		    "ioctl does not report bad host memory address");
+
+	/* Bad access register: */
+	run->psw_mask &= ~(3UL << (63 - 17));
+	run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+	vcpu_run(vm, VCPU_ID);              /* To sync new state to SIE block */
+	ksmo.gaddr = (uintptr_t)mem1;
+	ksmo.flags = 0;
+	ksmo.size = maxsize;
+	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.ar = 17;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+	run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
+	vcpu_run(vm, VCPU_ID);                  /* Run to sync new state */
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
new file mode 100644
index 000000000..b143db6d8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x CPU resets
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 3
+#define LOCAL_IRQS 32
+
+struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
+
+struct kvm_vm *vm;
+struct kvm_run *run;
+struct kvm_sync_regs *sync_regs;
+static uint8_t regs_null[512];
+
+static void guest_code_initial(void)
+{
+	/* set several CRs to "safe" value */
+	unsigned long cr2_59 = 0x10;	/* enable guarded storage */
+	unsigned long cr8_63 = 0x1;	/* monitor mask = 1 */
+	unsigned long cr10 = 1;		/* PER START */
+	unsigned long cr11 = -1;	/* PER END */
+
+
+	/* Dirty registers */
+	asm volatile (
+		"	lghi	2,0x11\n"	/* Round toward 0 */
+		"	sfpc	2\n"		/* set fpc to !=0 */
+		"	lctlg	2,2,%0\n"
+		"	lctlg	8,8,%1\n"
+		"	lctlg	10,10,%2\n"
+		"	lctlg	11,11,%3\n"
+		/* now clobber some general purpose regs */
+		"	llihh	0,0xffff\n"
+		"	llihl	1,0x5555\n"
+		"	llilh	2,0xaaaa\n"
+		"	llill	3,0x0000\n"
+		/* now clobber a floating point reg */
+		"	lghi	4,0x1\n"
+		"	cdgbr	0,4\n"
+		/* now clobber an access reg */
+		"	sar	9,4\n"
+		/* We embed diag 501 here to control register content */
+		"	diag 0,0,0x501\n"
+		:
+		: "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+		/* no clobber list as this should not return */
+		);
+}
+
+static void test_one_reg(uint64_t id, uint64_t value)
+{
+	struct kvm_one_reg reg;
+	uint64_t eval_reg;
+
+	reg.addr = (uintptr_t)&eval_reg;
+	reg.id = id;
+	vcpu_get_reg(vm, VCPU_ID, &reg);
+	TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
+}
+
+static void assert_noirq(void)
+{
+	struct kvm_s390_irq_state irq_state;
+	int irqs;
+
+	irq_state.len = sizeof(buf);
+	irq_state.buf = (unsigned long)buf;
+	irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
+	/*
+	 * irqs contains the number of retrieved interrupts. Any interrupt
+	 * (notably, the emergency call interrupt we have injected) should
+	 * be cleared by the resets, so this should be 0.
+	 */
+	TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+	TEST_ASSERT(!irqs, "IRQ pending");
+}
+
+static void assert_clear(void)
+{
+	struct kvm_sregs sregs;
+	struct kvm_regs regs;
+	struct kvm_fpu fpu;
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
+
+	vcpu_fpu_get(vm, VCPU_ID, &fpu);
+	TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+	/* sync regs */
+	TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+		    "gprs0-15 == 0 (sync_regs)");
+
+	TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+		    "acrs0-15 == 0 (sync_regs)");
+
+	TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+		    "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(void)
+{
+	TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+		    "gpr0 == 0xffff000000000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+		    "gpr1 == 0x0000555500000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+		    "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+		    "gpr3 == 0x0000000000000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+		    "fpr0 == 0f1 (sync_regs)");
+	TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
+}
+
+static void assert_initial(void)
+{
+	struct kvm_sregs sregs;
+	struct kvm_fpu fpu;
+
+	/* KVM_GET_SREGS */
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+	TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+		    "cr14 == 0xC2000000 (KVM_GET_SREGS)");
+	TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
+		    "cr1-13 == 0 (KVM_GET_SREGS)");
+	TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+	/* sync regs */
+	TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+		    "cr14 == 0xC2000000 (sync_regs)");
+	TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+		    "cr1-13 == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+	/* kvm_run */
+	TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+	TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+
+	vcpu_fpu_get(vm, VCPU_ID, &fpu);
+	TEST_ASSERT(!fpu.fpc, "fpc == 0");
+
+	test_one_reg(KVM_REG_S390_GBEA, 1);
+	test_one_reg(KVM_REG_S390_PP, 0);
+	test_one_reg(KVM_REG_S390_TODPR, 0);
+	test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
+	test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
+}
+
+static void assert_normal_noclear(void)
+{
+	TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
+static void assert_normal(void)
+{
+	test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+	TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+			"pft == 0xff.....  (sync_regs)");
+	assert_noirq();
+}
+
+static void inject_irq(int cpu_id)
+{
+	struct kvm_s390_irq_state irq_state;
+	struct kvm_s390_irq *irq = &buf[0];
+	int irqs;
+
+	/* Inject IRQ */
+	irq_state.len = sizeof(struct kvm_s390_irq);
+	irq_state.buf = (unsigned long)buf;
+	irq->type = KVM_S390_INT_EMERGENCY;
+	irq->u.emerg.code = cpu_id;
+	irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
+	TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+}
+
+static void test_normal(void)
+{
+	pr_info("Testing normal reset\n");
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+	run = vcpu_state(vm, VCPU_ID);
+	sync_regs = &run->s.regs;
+
+	vcpu_run(vm, VCPU_ID);
+
+	inject_irq(VCPU_ID);
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+
+	/* must clears */
+	assert_normal();
+	/* must not clears */
+	assert_normal_noclear();
+	assert_initial_noclear();
+
+	kvm_vm_free(vm);
+}
+
+static void test_initial(void)
+{
+	pr_info("Testing initial reset\n");
+	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+	run = vcpu_state(vm, VCPU_ID);
+	sync_regs = &run->s.regs;
+
+	vcpu_run(vm, VCPU_ID);
+
+	inject_irq(VCPU_ID);
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+
+	/* must clears */
+	assert_normal();
+	assert_initial();
+	/* must not clears */
+	assert_initial_noclear();
+
+	kvm_vm_free(vm);
+}
+
+static void test_clear(void)
+{
+	pr_info("Testing clear reset\n");
+	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+	run = vcpu_state(vm, VCPU_ID);
+	sync_regs = &run->s.regs;
+
+	vcpu_run(vm, VCPU_ID);
+
+	inject_irq(VCPU_ID);
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+
+	/* must clears */
+	assert_normal();
+	assert_initial();
+	assert_clear();
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
+
+	test_initial();
+	if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
+		test_normal();
+		test_clear();
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
new file mode 100644
index 000000000..5731ccf34
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 5
+
+static void guest_code(void)
+{
+	/*
+	 * We embed diag 501 here instead of doing a ucall to avoid that
+	 * the compiler has messed with r11 at the time of the ucall.
+	 */
+	asm volatile (
+		"0:	diag 0,0,0x501\n"
+		"	ahi 11,1\n"
+		"	j 0b\n"
+	);
+}
+
+#define REG_COMPARE(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%llx, 0x%llx\n", \
+		    left->reg, right->reg)
+
+#define REG_COMPARE32(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%x, 0x%x\n", \
+		    left->reg, right->reg)
+
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+	int i;
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+	int i;
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE32(acrs[i]);
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	int rv, cap;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+	if (!cap) {
+		print_skip("CAP_SYNC_REGS not supported");
+		exit(KSFT_SKIP);
+	}
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Request reading invalid register set from VCPU. */
+	run->kvm_valid_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	/* Request setting invalid register set into VCPU. */
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	/* Request and verify all valid register sets. */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+		    (run->s390_sieic.ipa >> 8) == 0x83 &&
+		    (run->s390_sieic.ipb >> 16) == 0x501,
+		    "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+		    run->s390_sieic.icptcode, run->s390_sieic.ipa,
+		    run->s390_sieic.ipb);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs);
+
+	/* Set and verify various register values */
+	run->s.regs.gprs[11] = 0xBAD1DEA;
+	run->s.regs.acrs[0] = 1 << 11;
+
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.gprs[11]);
+	TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
+		    "acr0 sync regs value incorrect 0x%x.",
+		    run->s.regs.acrs[0]);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs);
+
+	/* Clear kvm_dirty_regs bits, verify new s.regs values are
+	 * overwritten with existing guest values.
+	 */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.gprs[11] = 0xDEADBEEF;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.gprs[11]);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
new file mode 100644
index 000000000..6f441dd9f
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
+ * 2MB sized and aligned region so that the initial region corresponds to
+ * exactly one large page.
+ */
+#define MEM_REGION_SIZE		0x200000
+
+#ifdef __x86_64__
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.
+ */
+#define MEM_REGION_GPA		0xc0000000
+#define MEM_REGION_SLOT		10
+
+static const uint64_t MMIO_VAL = 0xbeefull;
+
+extern const uint64_t final_rip_start;
+extern const uint64_t final_rip_end;
+
+static sem_t vcpu_ready;
+
+static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+{
+	uint64_t val;
+
+	do {
+		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+	} while (val == spin_val);
+
+	GUEST_SYNC(0);
+	return val;
+}
+
+static void *vcpu_worker(void *data)
+{
+	struct kvm_vm *vm = data;
+	struct kvm_run *run;
+	struct ucall uc;
+	uint64_t cmd;
+
+	/*
+	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
+	 * which will occur if the guest attempts to access a memslot after it
+	 * has been deleted or while it is being moved .
+	 */
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			cmd = get_ucall(vm, VCPU_ID, &uc);
+			if (cmd != UCALL_SYNC)
+				break;
+
+			sem_post(&vcpu_ready);
+			continue;
+		}
+
+		if (run->exit_reason != KVM_EXIT_MMIO)
+			break;
+
+		TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
+		TEST_ASSERT(run->mmio.len == 8,
+			    "Unexpected exit mmio size = %u", run->mmio.len);
+
+		TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
+			    "Unexpected exit mmio address = 0x%llx",
+			    run->mmio.phys_addr);
+		memcpy(run->mmio.data, &MMIO_VAL, 8);
+	}
+
+	if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+		TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+			  __FILE__, uc.args[1], uc.args[2]);
+
+	return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+	struct timespec ts;
+
+	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+		    "clock_gettime() failed: %d\n", errno);
+
+	ts.tv_sec += 2;
+	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+		    "sem_timedwait() failed: %d\n", errno);
+
+	/* Wait for the vCPU thread to reenter the guest. */
+	usleep(100000);
+}
+
+static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+{
+	struct kvm_vm *vm;
+	uint64_t *hva;
+	uint64_t gpa;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+
+	/*
+	 * Allocate and map two pages so that the GPA accessed by guest_code()
+	 * stays valid across the memslot move.
+	 */
+	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+	/* Ditto for the host mapping so that both pages can be zeroed. */
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, 2 * 4096);
+
+	pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+
+	/* Ensure the guest thread is spun up. */
+	wait_for_vcpu();
+
+	return vm;
+}
+
+
+static void guest_code_move_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/*
+	 * Spin until the memory region starts getting moved to a
+	 * misaligned address.
+	 * Every region move may or may not trigger MMIO, as the
+	 * window where the memslot is invalid is usually quite small.
+	 */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the misaligning memory region move completes. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1 || val == 0, val);
+
+	/* Spin until the memory region starts to get re-aligned. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the re-aligning memory region move completes. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1, val);
+
+	GUEST_DONE();
+}
+
+static void test_move_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t *hva;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+
+	/*
+	 * Shift the region's base GPA.  The guest should not see "2" as the
+	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
+	 * different host pfn.
+	 */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+	WRITE_ONCE(*hva, 2);
+
+	/*
+	 * The guest _might_ see an invalid memslot and trigger MMIO, but it's
+	 * a tiny window.  Spin and defer the sync until the memslot is
+	 * restored and guest behavior is once again deterministic.
+	 */
+	usleep(100000);
+
+	/*
+	 * Note, value in memory needs to be changed *before* restoring the
+	 * memslot, else the guest could race the update and see "2".
+	 */
+	WRITE_ONCE(*hva, 1);
+
+	/* Restore the original base, the guest should see "1". */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+	wait_for_vcpu();
+	/* Defered sync from when the memslot was misaligned (above). */
+	wait_for_vcpu();
+
+	pthread_join(vcpu_thread, NULL);
+
+	kvm_vm_free(vm);
+}
+
+static void guest_code_delete_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	/* Spin until the memory region is recreated. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 0, val);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_start\n\t"
+	    "final_rip_start: .quad 1b\n\t"
+	    ".popsection");
+
+	/* Spin indefinitely (until the code memslot is deleted). */
+	guest_spin_on_val(MMIO_VAL);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_end\n\t"
+	    "final_rip_end: .quad 1b\n\t"
+	    ".popsection");
+
+	GUEST_ASSERT_1(0, 0);
+}
+
+static void test_delete_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_regs regs;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+
+	/* Delete the memory region, the guest should not die. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/* Recreate the memory region.  The guest should see "0". */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+	wait_for_vcpu();
+
+	/* Delete the region again so that there's only one memslot left. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/*
+	 * Delete the primary memslot.  This should cause an emulation error or
+	 * shutdown due to the page tables getting nuked.
+	 */
+	vm_mem_region_delete(vm, 0);
+
+	pthread_join(vcpu_thread, NULL);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
+		    run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason = %d", run->exit_reason);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+
+	/*
+	 * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
+	 * so the instruction pointer would point to the reset vector.
+	 */
+	if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
+		TEST_ASSERT(regs.rip >= final_rip_start &&
+			    regs.rip < final_rip_end,
+			    "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+			    final_rip_start, final_rip_end, regs.rip);
+
+	kvm_vm_free(vm);
+}
+
+static void test_zero_memory_regions(void)
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	pr_info("Testing KVM_RUN with zero added memory regions\n");
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	vm_vcpu_add(vm, VCPU_ID);
+
+	TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
+		    "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
+	vcpu_run(vm, VCPU_ID);
+
+	run = vcpu_state(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit_reason = %u\n", run->exit_reason);
+
+	kvm_vm_free(vm);
+}
+#endif /* __x86_64__ */
+
+/*
+ * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
+ * tentative to add further slots should fail.
+ */
+static void test_add_max_memory_regions(void)
+{
+	int ret;
+	struct kvm_vm *vm;
+	uint32_t max_mem_slots;
+	uint32_t slot;
+	uint64_t guest_addr = 0x0;
+	uint64_t mem_reg_npages;
+	void *mem;
+
+	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	TEST_ASSERT(max_mem_slots > 0,
+		    "KVM_CAP_NR_MEMSLOTS should be greater than 0");
+	pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+
+	mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+
+	/* Check it can be added memory slots up to the maximum allowed */
+	pr_info("Adding slots 0..%i, each memory region with %dK size\n",
+		(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+	for (slot = 0; slot < max_mem_slots; slot++) {
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    guest_addr, slot, mem_reg_npages,
+					    0);
+		guest_addr += MEM_REGION_SIZE;
+	}
+
+	/* Check it cannot be added memory slots beyond the limit */
+	mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+	ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
+		    &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
+		    MEM_REGION_SIZE, (uint64_t) mem});
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Adding one more memory slot should fail with EINVAL");
+
+	munmap(mem, MEM_REGION_SIZE);
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef __x86_64__
+	int i, loops;
+#endif
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+#ifdef __x86_64__
+	/*
+	 * FIXME: the zero-memslot test fails on aarch64 and s390x because
+	 * KVM_RUN fails with ENOEXEC or EFAULT.
+	 */
+	test_zero_memory_regions();
+#endif
+
+	test_add_max_memory_regions();
+
+#ifdef __x86_64__
+	if (argc > 1)
+		loops = atoi(argv[1]);
+	else
+		loops = 10;
+
+	pr_info("Testing MOVE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_move_memory_region();
+
+	pr_info("Testing DELETE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_delete_memory_region();
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
new file mode 100644
index 000000000..7daedee3e
--- /dev/null
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * steal/stolen time test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define NR_VCPUS		4
+#define ST_GPA_BASE		(1 << 30)
+#define MIN_RUN_DELAY_NS	200000UL
+
+static void *st_gva[NR_VCPUS];
+static uint64_t guest_stolen_time[NR_VCPUS];
+
+#if defined(__x86_64__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE		((sizeof(struct kvm_steal_time) + 63) & ~63)
+
+static void check_status(struct kvm_steal_time *st)
+{
+	GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+	GUEST_ASSERT(READ_ONCE(st->flags) == 0);
+	GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+}
+
+static void guest_code(int cpu)
+{
+	struct kvm_steal_time *st = st_gva[cpu];
+	uint32_t version;
+
+	GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+
+	memset(st, 0, sizeof(*st));
+	GUEST_SYNC(0);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+	version = READ_ONCE(st->version);
+	check_status(st);
+	GUEST_SYNC(1);
+
+	check_status(st);
+	GUEST_ASSERT(version < READ_ONCE(st->version));
+	WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+	check_status(st);
+	GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+	int i;
+
+	if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
+	      KVM_FEATURE_STEAL_TIME)) {
+		print_skip("steal-time not supported");
+		exit(KSFT_SKIP);
+	}
+
+	for (i = 0; i < NR_VCPUS; ++i) {
+		int ret;
+
+		vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
+
+		/* ST_GPA_BASE is identity mapped */
+		st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+		sync_global_to_guest(vm, st_gva[i]);
+
+		ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+		TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+		vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
+	}
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+	int i;
+
+	pr_info("VCPU%d:\n", vcpuid);
+	pr_info("    steal:     %lld\n", st->steal);
+	pr_info("    version:   %d\n", st->version);
+	pr_info("    flags:     %d\n", st->flags);
+	pr_info("    preempted: %d\n", st->preempted);
+	pr_info("    u8_pad:    ");
+	for (i = 0; i < 3; ++i)
+		pr_info("%d", st->u8_pad[i]);
+	pr_info("\n    pad:       ");
+	for (i = 0; i < 11; ++i)
+		pr_info("%d", st->pad[i]);
+	pr_info("\n");
+}
+
+#elif defined(__aarch64__)
+
+/* PV_TIME_ST must have 64-byte alignment */
+#define STEAL_TIME_SIZE		((sizeof(struct st_time) + 63) & ~63)
+
+#define SMCCC_ARCH_FEATURES	0x80000001
+#define PV_TIME_FEATURES	0xc5000020
+#define PV_TIME_ST		0xc5000021
+
+struct st_time {
+	uint32_t rev;
+	uint32_t attr;
+	uint64_t st_time;
+};
+
+static int64_t smccc(uint32_t func, uint64_t arg)
+{
+	unsigned long ret;
+
+	asm volatile(
+		"mov	w0, %w1\n"
+		"mov	x1, %2\n"
+		"hvc	#0\n"
+		"mov	%0, x0\n"
+	: "=r" (ret) : "r" (func), "r" (arg) :
+	  "x0", "x1", "x2", "x3");
+
+	return ret;
+}
+
+static void check_status(struct st_time *st)
+{
+	GUEST_ASSERT(READ_ONCE(st->rev) == 0);
+	GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+}
+
+static void guest_code(int cpu)
+{
+	struct st_time *st;
+	int64_t status;
+
+	status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
+	GUEST_ASSERT(status == 0);
+	status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
+	GUEST_ASSERT(status == 0);
+	status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
+	GUEST_ASSERT(status == 0);
+
+	status = smccc(PV_TIME_ST, 0);
+	GUEST_ASSERT(status != -1);
+	GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+
+	st = (struct st_time *)status;
+	GUEST_SYNC(0);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+	GUEST_SYNC(1);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+	GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+	struct kvm_device_attr dev = {
+		.group = KVM_ARM_VCPU_PVTIME_CTRL,
+		.attr = KVM_ARM_VCPU_PVTIME_IPA,
+	};
+	int i, ret;
+
+	ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
+	if (ret != 0 && errno == ENXIO) {
+		print_skip("steal-time not supported");
+		exit(KSFT_SKIP);
+	}
+
+	for (i = 0; i < NR_VCPUS; ++i) {
+		uint64_t st_ipa;
+
+		vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+
+		dev.addr = (uint64_t)&st_ipa;
+
+		/* ST_GPA_BASE is identity mapped */
+		st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+		sync_global_to_guest(vm, st_gva[i]);
+
+		st_ipa = (ulong)st_gva[i] | 1;
+		ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+		TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+		st_ipa = (ulong)st_gva[i];
+		vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+
+		ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+		TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+	}
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+
+	pr_info("VCPU%d:\n", vcpuid);
+	pr_info("    rev:     %d\n", st->rev);
+	pr_info("    attr:    %d\n", st->attr);
+	pr_info("    st_time: %ld\n", st->st_time);
+}
+
+#endif
+
+static long get_run_delay(void)
+{
+	char path[64];
+	long val[2];
+	FILE *fp;
+
+	sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+	fp = fopen(path, "r");
+	fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+	fclose(fp);
+
+	return val[1];
+}
+
+static void *do_steal_time(void *arg)
+{
+	struct timespec ts, stop;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	stop = timespec_add_ns(ts, MIN_RUN_DELAY_NS);
+
+	while (1) {
+		clock_gettime(CLOCK_MONOTONIC, &ts);
+		if (timespec_to_ns(timespec_sub(ts, stop)) >= 0)
+			break;
+	}
+
+	return NULL;
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct ucall uc;
+
+	vcpu_args_set(vm, vcpuid, 1, vcpuid);
+
+	vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+
+	switch (get_ucall(vm, vcpuid, &uc)) {
+	case UCALL_SYNC:
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
+			    __FILE__, uc.args[1]);
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+	}
+}
+
+int main(int ac, char **av)
+{
+	struct kvm_vm *vm;
+	pthread_attr_t attr;
+	pthread_t thread;
+	cpu_set_t cpuset;
+	unsigned int gpages;
+	long stolen_time;
+	long run_delay;
+	bool verbose;
+	int i;
+
+	verbose = ac > 1 && (!strncmp(av[1], "-v", 3) || !strncmp(av[1], "--verbose", 10));
+
+	/* Set CPU affinity so we can force preemption of the VCPU */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	pthread_attr_init(&attr);
+	pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+	/* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
+	vm = vm_create_default(0, 0, guest_code);
+	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+	virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+	ucall_init(vm, NULL);
+
+	/* Add the rest of the VCPUs */
+	for (i = 1; i < NR_VCPUS; ++i)
+		vm_vcpu_add_default(vm, i, guest_code);
+
+	steal_time_init(vm);
+
+	/* Run test on each VCPU */
+	for (i = 0; i < NR_VCPUS; ++i) {
+		/* First VCPU run initializes steal-time */
+		run_vcpu(vm, i);
+
+		/* Second VCPU run, expect guest stolen time to be <= run_delay */
+		run_vcpu(vm, i);
+		sync_global_from_guest(vm, guest_stolen_time[i]);
+		stolen_time = guest_stolen_time[i];
+		run_delay = get_run_delay();
+		TEST_ASSERT(stolen_time <= run_delay,
+			    "Expected stolen time <= %ld, got %ld",
+			    run_delay, stolen_time);
+
+		/* Steal time from the VCPU. The steal time thread has the same CPU affinity as the VCPUs. */
+		run_delay = get_run_delay();
+		pthread_create(&thread, &attr, do_steal_time, NULL);
+		do
+			pthread_yield();
+		while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
+		pthread_join(thread, NULL);
+		run_delay = get_run_delay() - run_delay;
+		TEST_ASSERT(run_delay >= MIN_RUN_DELAY_NS,
+			    "Expected run_delay >= %ld, got %ld",
+			    MIN_RUN_DELAY_NS, run_delay);
+
+		/* Run VCPU again to confirm stolen time is consistent with run_delay */
+		run_vcpu(vm, i);
+		sync_global_from_guest(vm, guest_stolen_time[i]);
+		stolen_time = guest_stolen_time[i] - stolen_time;
+		TEST_ASSERT(stolen_time >= run_delay,
+			    "Expected stolen time >= %ld, got %ld",
+			    run_delay, stolen_time);
+
+		if (verbose) {
+			pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
+				guest_stolen_time[i], stolen_time);
+			if (stolen_time == run_delay)
+				pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
+			pr_info("\n");
+			steal_time_dump(vm, i);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000..140e91901
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define X86_FEATURE_XSAVE	(1<<26)
+#define X86_FEATURE_OSXSAVE	(1<<27)
+#define VCPU_ID			1
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+	int func, subfunc;
+	uint32_t eax, ebx, ecx, edx;
+	uint64_t cr4;
+
+	func = 0x1;
+	subfunc = 0x0;
+	__asm__ __volatile__("cpuid"
+			     : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+			     : "a"(func), "c"(subfunc));
+
+	cr4 = get_cr4();
+
+	return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+	uint64_t cr4;
+
+	/* turn on CR4.OSXSAVE */
+	cr4 = get_cr4();
+	cr4 |= X86_CR4_OSXSAVE;
+	set_cr4(cr4);
+
+	/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	/* notify hypervisor to change CR4 */
+	GUEST_SYNC(0);
+
+	/* check again */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct kvm_sregs sregs;
+	struct kvm_cpuid_entry2 *entry;
+	struct ucall uc;
+	int rc;
+
+	entry = kvm_get_supported_cpuid_entry(1);
+	if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+		print_skip("XSAVE feature not supported");
+		return 0;
+	}
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		rc = _vcpu_run(vm, VCPU_ID);
+
+		TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			/* emulate hypervisor clearing CR4.OSXSAVE */
+			vcpu_sregs_get(vm, VCPU_ID, &sregs);
+			sregs.cr4 &= ~X86_CR4_OSXSAVE;
+			vcpu_sregs_set(vm, VCPU_ID, &sregs);
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+	kvm_vm_free(vm);
+
+done:
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
new file mode 100644
index 000000000..2fc6b3af8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+#define DR6_BD		(1 << 13)
+#define DR7_GD		(1 << 13)
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+	/*
+	 * Software BP tests.
+	 *
+	 * NOTE: sw_bp need to be before the cmd here, because int3 is an
+	 * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+	 * capture it using the vcpu exception bitmap).
+	 */
+	asm volatile("sw_bp: int3");
+
+	/* Hardware instruction BP test */
+	asm volatile("hw_bp: nop");
+
+	/* Hardware data BP test */
+	asm volatile("mov $1234,%%rax;\n\t"
+		     "mov %%rax,%0;\n\t write_data:"
+		     : "=m" (guest_value) : : "rax");
+
+	/* Single step test, covers 2 basic instructions and 2 emulated */
+	asm volatile("ss_start: "
+		     "xor %%eax,%%eax\n\t"
+		     "cpuid\n\t"
+		     "movl $0x1a0,%%ecx\n\t"
+		     "rdmsr\n\t"
+		     : : : "eax", "ebx", "ecx", "edx");
+
+	/* DR6.BD test */
+	asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+	GUEST_DONE();
+}
+
+#define  CLEAR_DEBUG()  memset(&debug, 0, sizeof(debug))
+#define  APPLY_DEBUG()  vcpu_set_guest_debug(vm, VCPU_ID, &debug)
+#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
+#define  SET_RIP(v)  do {				\
+		vcpu_regs_get(vm, VCPU_ID, &regs);	\
+		regs.rip = (v);				\
+		vcpu_regs_set(vm, VCPU_ID, &regs);	\
+	} while (0)
+#define  MOVE_RIP(v)  SET_RIP(regs.rip + (v));
+
+int main(void)
+{
+	struct kvm_guest_debug debug;
+	unsigned long long target_dr6, target_rip;
+	struct kvm_regs regs;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	uint64_t cmd;
+	int i;
+	/* Instruction lengths starting at ss_start */
+	int ss_size[4] = {
+		2,		/* xor */
+		2,		/* cpuid */
+		5,		/* mov */
+		2,		/* rdmsr */
+	};
+
+	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
+		print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
+		return 0;
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Test software BPs - int3 */
+	CLEAR_DEBUG();
+	debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+	APPLY_DEBUG();
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+		    run->debug.arch.exception == BP_VECTOR &&
+		    run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+		    "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+		    run->exit_reason, run->debug.arch.exception,
+		    run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+	MOVE_RIP(1);
+
+	/* Test instruction HW BP over DR[0-3] */
+	for (i = 0; i < 4; i++) {
+		CLEAR_DEBUG();
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+		debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+		debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+		APPLY_DEBUG();
+		vcpu_run(vm, VCPU_ID);
+		target_dr6 = 0xffff0ff0 | (1UL << i);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+			    run->debug.arch.dr6, target_dr6);
+	}
+	/* Skip "nop" */
+	MOVE_RIP(1);
+
+	/* Test data access HW BP over DR[0-3] */
+	for (i = 0; i < 4; i++) {
+		CLEAR_DEBUG();
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+		debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+		debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+		    (0x000d0000UL << (4*i));
+		APPLY_DEBUG();
+		vcpu_run(vm, VCPU_ID);
+		target_dr6 = 0xffff0ff0 | (1UL << i);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, CAST_TO_RIP(write_data),
+			    run->debug.arch.dr6, target_dr6);
+		/* Rollback the 4-bytes "mov" */
+		MOVE_RIP(-7);
+	}
+	/* Skip the 4-bytes "mov" */
+	MOVE_RIP(7);
+
+	/* Test single step */
+	target_rip = CAST_TO_RIP(ss_start);
+	target_dr6 = 0xffff4ff0ULL;
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
+		target_rip += ss_size[i];
+		CLEAR_DEBUG();
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+		debug.arch.debugreg[7] = 0x00000400;
+		APPLY_DEBUG();
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == target_rip &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+			    target_dr6);
+	}
+
+	/* Finally test global disable */
+	CLEAR_DEBUG();
+	debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+	debug.arch.debugreg[7] = 0x400 | DR7_GD;
+	APPLY_DEBUG();
+	vcpu_run(vm, VCPU_ID);
+	target_dr6 = 0xffff0ff0 | DR6_BD;
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+		    run->debug.arch.exception == DB_VECTOR &&
+		    run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+		    run->debug.arch.dr6 == target_dr6,
+			    "DR7.GD: exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+			    target_dr6);
+
+	/* Disable all debug controls, run to the end */
+	CLEAR_DEBUG();
+	APPLY_DEBUG();
+
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
+	cmd = get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
new file mode 100644
index 000000000..757928199
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(7);
+
+	GUEST_SYNC(8);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	current_evmcs->revision_id = -1u;
+	GUEST_ASSERT(vmlaunch());
+	current_evmcs->revision_id = EVMCS_VERSION;
+	GUEST_SYNC(6);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_SYNC(9);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(10);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+
+	/* Try enlightened vmptrld with an incorrect GPA */
+	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+	GUEST_ASSERT(vmlaunch());
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	if (!nested_vmx_supported() ||
+	    !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		print_skip("Enlightened VMCS is unsupported");
+		exit(KSFT_SKIP);
+	}
+
+	vcpu_enable_evmcs(vm, VCPU_ID);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+		      		  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto part1_done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_enable_evmcs(vm, VCPU_ID);
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+part1_done:
+	_vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+		    "Unexpected successful VMEnter with invalid eVMCS pointer!");
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
new file mode 100644
index 000000000..745b708c2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_HYPERV_CPUID
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID 0
+
+static void guest_code(void)
+{
+}
+
+static bool smt_possible(void)
+{
+	char buf[16];
+	FILE *f;
+	bool res = true;
+
+	f = fopen("/sys/devices/system/cpu/smt/control", "r");
+	if (f) {
+		if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+			if (!strncmp(buf, "forceoff", 8) ||
+			    !strncmp(buf, "notsupported", 12))
+				res = false;
+		}
+		fclose(f);
+	}
+
+	return res;
+}
+
+static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
+			  bool evmcs_enabled)
+{
+	int i;
+	int nent = 9;
+	u32 test_val;
+
+	if (evmcs_enabled)
+		nent += 1; /* 0x4000000A */
+
+	TEST_ASSERT(hv_cpuid_entries->nent == nent,
+		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+		    " with evmcs=%d (returned %d)",
+		    nent, evmcs_enabled, hv_cpuid_entries->nent);
+
+	for (i = 0; i < hv_cpuid_entries->nent; i++) {
+		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+
+		TEST_ASSERT((entry->function >= 0x40000000) &&
+			    (entry->function <= 0x40000082),
+			    "function %x is our of supported range",
+			    entry->function);
+
+		TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+			    "0x4000000A leaf should not be reported");
+
+		TEST_ASSERT(entry->index == 0,
+			    ".index field should be zero");
+
+		TEST_ASSERT(entry->flags == 0,
+			    ".flags field should be zero");
+
+		TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
+			    !entry->padding[2], "padding should be zero");
+
+		switch (entry->function) {
+		case 0x40000000:
+			test_val = 0x40000082;
+
+			TEST_ASSERT(entry->eax == test_val,
+				    "Wrong max leaf report in 0x40000000.EAX: %x"
+				    " (evmcs=%d)",
+				    entry->eax, evmcs_enabled
+				);
+			break;
+		case 0x40000004:
+			test_val = entry->eax & (1UL << 18);
+
+			TEST_ASSERT(!!test_val == !smt_possible(),
+				    "NoNonArchitecturalCoreSharing bit"
+				    " doesn't reflect SMT setting");
+			break;
+		}
+
+		/*
+		 * If needed for debug:
+		 * fprintf(stdout,
+		 *	"CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
+		 *	entry->function, entry->eax, entry->ebx, entry->ecx,
+		 *	entry->edx);
+		 */
+	}
+
+}
+
+void test_hv_cpuid_e2big(struct kvm_vm *vm)
+{
+	static struct kvm_cpuid2 cpuid = {.nent = 0};
+	int ret;
+
+	ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+
+	TEST_ASSERT(ret == -1 && errno == E2BIG,
+		    "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+		    " it should have: %d %d", ret, errno);
+}
+
+
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+{
+	int nent = 20; /* should be enough */
+	static struct kvm_cpuid2 *cpuid;
+
+	cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
+
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+	return cpuid;
+}
+
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	int rv, stage;
+	struct kvm_cpuid2 *hv_cpuid_entries;
+	bool evmcs_enabled;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
+	if (!rv) {
+		print_skip("KVM_CAP_HYPERV_CPUID not supported");
+		exit(KSFT_SKIP);
+	}
+
+	for (stage = 0; stage < 3; stage++) {
+		evmcs_enabled = false;
+
+		vm = vm_create_default(VCPU_ID, 0, guest_code);
+		switch (stage) {
+		case 0:
+			test_hv_cpuid_e2big(vm);
+			continue;
+		case 1:
+			break;
+		case 2:
+			if (!nested_vmx_supported() ||
+			    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+				print_skip("Enlightened VMCS is unsupported");
+				continue;
+			}
+			vcpu_enable_evmcs(vm, VCPU_ID);
+			evmcs_enabled = true;
+			break;
+		}
+
+		hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
+		test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
+		free(hv_cpuid_entries);
+		kvm_vm_free(vm);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000..b10a27485
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+	u32 lo, hi;
+
+	asm volatile("rdmsr_start: rdmsr;"
+		     "rdmsr_end:"
+		     : "=a"(lo), "=c"(hi)
+		     : "c"(idx));
+
+	return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+	u32 lo, hi;
+
+	lo = val;
+	hi = val >> 32;
+
+	asm volatile("wrmsr_start: wrmsr;"
+		     "wrmsr_end:"
+		     : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	unsigned char *rip = (unsigned char *)regs->rip;
+	bool r, w;
+
+	r = rip == &rdmsr_start;
+	w = rip == &wrmsr_start;
+	GUEST_ASSERT(r || w);
+
+	nr_gp++;
+
+	if (r)
+		regs->rip = (uint64_t)&rdmsr_end;
+	else
+		regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+	uint32_t idx;
+	const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+	TEST_MSR(MSR_KVM_SYSTEM_TIME),
+	TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+	TEST_MSR(MSR_KVM_WALL_CLOCK),
+	TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+	TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+	TEST_MSR(MSR_KVM_STEAL_TIME),
+	TEST_MSR(MSR_KVM_PV_EOI_EN),
+	TEST_MSR(MSR_KVM_POLL_CONTROL),
+	TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+	TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+	PR_MSR(msr);
+	do_rdmsr(msr->idx);
+	GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+	nr_gp = 0;
+	do_wrmsr(msr->idx, 0);
+	GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+	nr_gp = 0;
+}
+
+struct hcall_data {
+	uint64_t nr;
+	const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+	TEST_HCALL(KVM_HC_KICK_CPU),
+	TEST_HCALL(KVM_HC_SEND_IPI),
+	TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+	uint64_t r;
+
+	PR_HCALL(hc);
+	r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+	GUEST_ASSERT(r == -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+		test_msr(&msrs_to_test[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+		test_hcall(&hcalls_to_test[i]);
+	}
+
+	GUEST_DONE();
+}
+
+static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
+{
+	struct kvm_cpuid_entry2 ent = {0};
+
+	ent.function = KVM_CPUID_FEATURES;
+	TEST_ASSERT(set_cpuid(cpuid, &ent),
+		    "failed to clear KVM_CPUID_FEATURES leaf");
+}
+
+static void pr_msr(struct ucall *uc)
+{
+	struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+	pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+	struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+	pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+		  __FILE__, uc->args[1]);
+}
+
+#define VCPU_ID 0
+
+static void enter_guest(struct kvm_vm *vm)
+{
+	struct kvm_run *run;
+	struct ucall uc;
+	int r;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (true) {
+		r = _vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_PR_MSR:
+			pr_msr(&uc);
+			break;
+		case UCALL_PR_HCALL:
+			pr_hcall(&uc);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		case UCALL_DONE:
+			return;
+		}
+	}
+}
+
+int main(void)
+{
+	struct kvm_enable_cap cap = {0};
+	struct kvm_cpuid2 *best;
+	struct kvm_vm *vm;
+
+	if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
+		pr_info("will skip kvm paravirt restriction tests.\n");
+		return 0;
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+	cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
+	cap.args[0] = 1;
+	vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+	best = kvm_get_supported_cpuid();
+	clear_kvm_cpuid_features(best);
+	vcpu_set_cpuid(vm, VCPU_ID, best);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+	enter_guest(vm);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
new file mode 100644
index 000000000..9f55ccd16
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -0,0 +1,127 @@
+/*
+ * mmio_warning_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that we don't get a kernel warning when we call KVM_RUN after a
+ * triple fault occurs.  To get the triple fault to occur we call KVM_RUN
+ * on a VCPU that hasn't been properly setup.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <kvm_util.h>
+#include <linux/kvm.h>
+#include <processor.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <test_util.h>
+#include <unistd.h>
+
+#define NTHREAD 4
+#define NPROCESS 5
+
+struct thread_context {
+	int kvmcpu;
+	struct kvm_run *run;
+};
+
+void *thr(void *arg)
+{
+	struct thread_context *tc = (struct thread_context *)arg;
+	int res;
+	int kvmcpu = tc->kvmcpu;
+	struct kvm_run *run = tc->run;
+
+	res = ioctl(kvmcpu, KVM_RUN, 0);
+	pr_info("ret1=%d exit_reason=%d suberror=%d\n",
+		res, run->exit_reason, run->internal.suberror);
+
+	return 0;
+}
+
+void test(void)
+{
+	int i, kvm, kvmvm, kvmcpu;
+	pthread_t th[NTHREAD];
+	struct kvm_run *run;
+	struct thread_context tc;
+
+	kvm = open("/dev/kvm", O_RDWR);
+	TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
+	kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
+	TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
+	kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
+	TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
+	run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
+				    kvmcpu, 0);
+	tc.kvmcpu = kvmcpu;
+	tc.run = run;
+	srand(getpid());
+	for (i = 0; i < NTHREAD; i++) {
+		pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc);
+		usleep(rand() % 10000);
+	}
+	for (i = 0; i < NTHREAD; i++)
+		pthread_join(th[i], NULL);
+}
+
+int get_warnings_count(void)
+{
+	int warnings;
+	FILE *f;
+
+	f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
+	if (fscanf(f, "%d", &warnings) < 1)
+		warnings = 0;
+	pclose(f);
+
+	return warnings;
+}
+
+int main(void)
+{
+	int warnings_before, warnings_after;
+
+	if (!is_intel_cpu()) {
+		print_skip("Must be run on an Intel CPU");
+		exit(KSFT_SKIP);
+	}
+
+	if (vm_is_unrestricted_guest(NULL)) {
+		print_skip("Unrestricted guest must be disabled");
+		exit(KSFT_SKIP);
+	}
+
+	warnings_before = get_warnings_count();
+
+	for (int i = 0; i < NPROCESS; ++i) {
+		int status;
+		int pid = fork();
+
+		if (pid < 0)
+			exit(1);
+		if (pid == 0) {
+			test();
+			exit(0);
+		}
+		while (waitpid(pid, &status, __WALL) != pid)
+			;
+	}
+
+	warnings_after = get_warnings_count();
+	TEST_ASSERT(warnings_before == warnings_after,
+		   "Warnings found in kernel.  Run 'dmesg' to inspect them.");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
new file mode 100644
index 000000000..1e89688cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+	uint64_t msr_platform_info;
+
+	for (;;) {
+		msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+		GUEST_SYNC(msr_platform_info);
+		asm volatile ("inc %r11");
+	}
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+	struct kvm_enable_cap cap = {};
+
+	cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+	cap.flags = 0;
+	cap.args[0] = (int)enable;
+	vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	set_msr_platform_info_enabled(vm, true);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+	get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(uc.cmd == UCALL_SYNC,
+			"Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
+	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	set_msr_platform_info_enabled(vm, false);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+			"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	int rv;
+	uint64_t msr_platform_info;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+	if (!rv) {
+		print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+		msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+	test_msr_platform_info_enabled(vm);
+	test_msr_platform_info_disabled(vm);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
new file mode 100644
index 000000000..9f7656184
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID                  5
+
+int main(int argc, char *argv[])
+{
+	struct kvm_sregs sregs;
+	struct kvm_vm *vm;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, NULL);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	sregs.apic_base = 1 << 10;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+		    sregs.apic_base);
+	sregs.apic_base = 1 << 11;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+		    sregs.apic_base);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
new file mode 100644
index 000000000..ae39a2206
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+#include "svm_util.h"
+
+#define VCPU_ID	      1
+
+#define PAGE_SIZE  4096
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+	0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
+	0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
+	0x0f, 0xaa,           /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+	asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+		     : "+a" (phase));
+}
+
+void self_smi(void)
+{
+	wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
+	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+void guest_code(void *arg)
+{
+	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+
+	sync_with_host(1);
+
+	wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+	sync_with_host(2);
+
+	self_smi();
+
+	sync_with_host(4);
+
+	if (arg) {
+		if (cpu_has_svm())
+			generic_svm_setup(arg, NULL, NULL);
+		else
+			GUEST_ASSERT(prepare_for_vmx_operation(arg));
+
+		sync_with_host(5);
+
+		self_smi();
+
+		sync_with_host(7);
+	}
+
+	sync_with_host(DONE);
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_gva = 0;
+
+	struct kvm_regs regs;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	int stage, stage_reported;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+	TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+		    == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+	memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+	memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+	       sizeof(smi_handler));
+
+	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		if (nested_svm_supported())
+			vcpu_alloc_svm(vm, &nested_gva);
+		else if (nested_vmx_supported())
+			vcpu_alloc_vmx(vm, &nested_gva);
+	}
+
+	if (!nested_gva)
+		pr_info("will skip SMM test with VMX enabled\n");
+
+	vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs, 0, sizeof(regs));
+		vcpu_regs_get(vm, VCPU_ID, &regs);
+
+		stage_reported = regs.rax & 0xff;
+
+		if (stage_reported == DONE)
+			goto done;
+
+		TEST_ASSERT(stage_reported == stage ||
+			    stage_reported == SMRAM_STAGE,
+			    "Unexpected stage: #%x, got %x",
+			    stage, stage_reported);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
new file mode 100644
index 000000000..f6c8b9042
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define VCPU_ID		5
+#define L2_GUEST_STACK_SIZE 256
+
+void svm_l2_guest_code(void)
+{
+	GUEST_SYNC(4);
+	/* Exit to L1 */
+	vmcall();
+	GUEST_SYNC(6);
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, svm_l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(3);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(5);
+	vmcb->save.rip += 3;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	/* Exit to L1 */
+	vmcall();
+
+	/* L1 has now set up a shadow VMCS for us.  */
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_SYNC(10);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+	GUEST_SYNC(11);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+	GUEST_SYNC(12);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	/* Check that the launched state is preserved.  */
+	GUEST_ASSERT(vmlaunch());
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_SYNC(7);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+	vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmlaunch());
+	GUEST_SYNC(8);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+
+	vmwrite(GUEST_RIP, 0xc0ffee);
+	GUEST_SYNC(9);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+	GUEST_SYNC(13);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+}
+
+static void __attribute__((__flatten__)) guest_code(void *arg)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (arg) {
+		if (cpu_has_svm())
+			svm_l1_guest_code(arg);
+		else
+			vmx_l1_guest_code(arg);
+	}
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		if (nested_svm_supported())
+			vcpu_alloc_svm(vm, &nested_gva);
+		else if (nested_vmx_supported())
+			vcpu_alloc_vmx(vm, &nested_gva);
+	}
+
+	if (!nested_gva)
+		pr_info("will skip nested state checks\n");
+
+	vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+			       	  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
new file mode 100644
index 000000000..0e1adb4e3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define VCPU_ID		5
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+	#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	run_guest(vmcb, svm->vmcb_gpa);
+
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t svm_gva;
+
+	nested_svm_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	vcpu_alloc_svm(vm, &svm_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
new file mode 100644
index 000000000..d672f0a47
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 5
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
+void guest_code(void)
+{
+	asm volatile("1: in %[port], %%al\n"
+		     "add $0x1, %%rbx\n"
+		     "jmp 1b"
+		     : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%llx, 0x%llx\n", \
+		    left->reg, right->reg)
+	REG_COMPARE(rax);
+	REG_COMPARE(rbx);
+	REG_COMPARE(rcx);
+	REG_COMPARE(rdx);
+	REG_COMPARE(rsi);
+	REG_COMPARE(rdi);
+	REG_COMPARE(rsp);
+	REG_COMPARE(rbp);
+	REG_COMPARE(r8);
+	REG_COMPARE(r9);
+	REG_COMPARE(r10);
+	REG_COMPARE(r11);
+	REG_COMPARE(r12);
+	REG_COMPARE(r13);
+	REG_COMPARE(r14);
+	REG_COMPARE(r15);
+	REG_COMPARE(rip);
+	REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+				struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_vcpu_events events;
+	int rv, cap;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+	if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+		print_skip("KVM_CAP_SYNC_REGS not supported");
+		exit(KSFT_SKIP);
+	}
+	if ((cap & INVALID_SYNC_FIELD) != 0) {
+		print_skip("The \"invalid\" field is not invalid");
+		exit(KSFT_SKIP);
+	}
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Request reading invalid register set from VCPU. */
+	run->kvm_valid_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	/* Request setting invalid register set into VCPU. */
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	/* Request and verify all valid register sets. */
+	/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Set and verify various register values. */
+	run->s.regs.regs.rbx = 0xBAD1DEA;
+	run->s.regs.sregs.apic_base = 1 << 11;
+	/* TODO run->s.regs.events.XYZ = ABC; */
+
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
+		    "rbx sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.rbx);
+	TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+		    "apic_base sync regs value incorrect 0x%llx.",
+		    run->s.regs.sregs.apic_base);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Clear kvm_dirty_regs bits, verify new s.regs values are
+	 * overwritten with existing guest values.
+	 */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.rbx = 0xDEADBEEF;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
+		    "rbx sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.rbx);
+
+	/* Clear kvm_valid_regs bits and kvm_dirty_bits.
+	 * Verify s.regs values are not overwritten with existing guest values
+	 * and that guest values are not overwritten with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.rbx = 0xAAAA;
+	regs.rbx = 0xBAC0;
+	vcpu_regs_set(vm, VCPU_ID, &regs);
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
+		    "rbx sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.rbx);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
+		    "rbx guest value incorrect 0x%llx.",
+		    regs.rbx);
+
+	/* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+	 * with existing guest values but that guest values are overwritten
+	 * with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+	run->s.regs.regs.rbx = 0xBBBB;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
+		    "rbx sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.rbx);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.rbx == 0xBBBB + 1,
+		    "rbx guest value incorrect 0x%llx.",
+		    regs.rbx);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
new file mode 100644
index 000000000..f8e761149
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+#define UNITY                  (1ull << 30)
+#define HOST_ADJUST            (UNITY * 64)
+#define GUEST_STEP             (UNITY * 4)
+#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x)       ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vm, 0, x))
+
+#define GUEST_ASSERT_EQ(a, b) do {				\
+	__typeof(a) _a = (a);					\
+	__typeof(b) _b = (b);					\
+	if (_a != _b)						\
+                ucall(UCALL_ABORT, 4,				\
+                        "Failed guest assert: "			\
+                        #a " == " #b, __LINE__, _a, _b);	\
+  } while(0)
+
+static void guest_code(void)
+{
+	u64 val = 0;
+
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+	val = 1ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+	GUEST_SYNC(2);
+	val = 2ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC_ADJUST, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Host: setting the TSC offset.  */
+	GUEST_SYNC(3);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+	 * host-side offset and affect both MSRs.
+	 */
+	GUEST_SYNC(4);
+	val = 3ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC_ADJUST, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+	 * offset is now visible in MSR_IA32_TSC_ADJUST.
+	 */
+	GUEST_SYNC(5);
+	val = 4ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+	GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+{
+	struct ucall uc;
+
+	vcpu_args_set(vm, vcpuid, 1, vcpuid);
+
+	vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+
+	switch (get_ucall(vm, vcpuid, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                            uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+                            stage + 1, (ulong)uc.args[1]);
+		return;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		TEST_ASSERT(false, "%s at %s:%ld\n" \
+			    "\tvalues: %#lx, %#lx", (const char *)uc.args[0],
+			    __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+	}
+}
+
+int main(void)
+{
+	struct kvm_vm *vm;
+	uint64_t val;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	val = 0;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+	run_vcpu(vm, VCPU_ID, 1);
+	val = 1ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+	run_vcpu(vm, VCPU_ID, 2);
+	val = 2ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Host: writes to MSR_IA32_TSC set the host-side offset
+	 * and therefore do not change MSR_IA32_TSC_ADJUST.
+	 */
+	vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+	run_vcpu(vm, VCPU_ID, 3);
+
+	/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
+	vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+	/* Restore previous value.  */
+	vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+	 * host-side offset and affect both MSRs.
+	 */
+	run_vcpu(vm, VCPU_ID, 4);
+	val = 3ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+	 * offset is now visible in MSR_IA32_TSC_ADJUST.
+	 */
+	run_vcpu(vm, VCPU_ID, 5);
+	val = 4ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
new file mode 100644
index 000000000..cbe1b0889
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
+ *
+ * Copyright (C) 2020, Amazon Inc.
+ *
+ * This is a functional test to verify that we can deflect MSR events
+ * into user space.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID                  5
+
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+	u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+	bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+	memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+	memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+	memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+	memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+	memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+	deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+	deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+	deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter = {
+	.flags = KVM_MSR_FILTER_DEFAULT_DENY,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000_write,
+		}, {
+			.flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+			.base = 0x40000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_40000000,
+		}, {
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000_read,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+			.base = 0xdeadbeef,
+			.nmsrs = 1,
+			.bitmap = bitmap_deadbeef,
+		},
+	},
+};
+
+struct kvm_msr_filter no_filter = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+static void guest_msr_calls(bool trapped)
+{
+	/* This goes into the in-kernel emulation */
+	wrmsr(MSR_SYSCALL_MASK, 0);
+
+	if (trapped) {
+		/* This goes into user space emulation */
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+	} else {
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+	}
+
+	/* If trapped == true, this goes into user space emulation */
+	wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+	/* This goes into the in-kernel emulation */
+	rdmsr(MSR_IA32_POWER_CTL);
+
+	/* Invalid MSR, should always be handled by user space exit */
+	GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+	wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code(void)
+{
+	guest_msr_calls(true);
+
+	/*
+	 * Disable msr filtering, so that the kernel
+	 * handles everything in the next round
+	 */
+	GUEST_SYNC(0);
+
+	guest_msr_calls(false);
+
+	GUEST_DONE();
+}
+
+static int handle_ucall(struct kvm_vm *vm)
+{
+	struct ucall uc;
+
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_ABORT:
+		TEST_FAIL("Guest assertion not met");
+		break;
+	case UCALL_SYNC:
+		vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter);
+		break;
+	case UCALL_DONE:
+		return 1;
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
+	}
+
+	return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+	run->msr.data = run->msr.index;
+	msr_reads++;
+
+	if (run->msr.index == MSR_SYSCALL_MASK ||
+	    run->msr.index == MSR_GS_BASE) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR read trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "MSR deadbeef read trap w/o inval fault");
+	}
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+	/* ignore */
+	msr_writes++;
+
+	if (run->msr.index == MSR_IA32_POWER_CTL) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for MSR_IA32_POWER_CTL incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR_IA32_POWER_CTL trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for deadbeef incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "deadbeef trap w/o inval fault");
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_X86_USER_SPACE_MSR,
+		.args[0] = KVM_MSR_EXIT_REASON_INVAL |
+			   KVM_MSR_EXIT_REASON_UNKNOWN |
+			   KVM_MSR_EXIT_REASON_FILTER,
+	};
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, &cap);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	prepare_bitmaps();
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+
+	while (1) {
+		rc = _vcpu_run(vm, VCPU_ID);
+
+		TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+
+		switch (run->exit_reason) {
+		case KVM_EXIT_X86_RDMSR:
+			handle_rdmsr(run);
+			break;
+		case KVM_EXIT_X86_WRMSR:
+			handle_wrmsr(run);
+			break;
+		case KVM_EXIT_IO:
+			if (handle_ucall(vm))
+				goto done;
+			break;
+		}
+
+	}
+
+done:
+	TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+	TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
new file mode 100644
index 000000000..1f65342d6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID		0
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+	/* Exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+	control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+	vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+	/* Try to launch L2 with the memory-backed APIC-access address. */
+	GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+	/* Try to resume L2 with the unbacked APIC-access address. */
+	GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long apic_access_addr = ~0ul;
+	unsigned int paddr_width;
+	unsigned int vaddr_width;
+	vm_vaddr_t vmx_pages_gva;
+	unsigned long high_gpa;
+	struct vmx_pages *vmx;
+	bool done = false;
+
+	nested_vmx_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
+	high_gpa = (1ul << paddr_width) - getpagesize();
+	if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) {
+		print_skip("No unbacked physical page available");
+		exit(KSFT_SKIP);
+	}
+
+	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	prepare_virtualize_apic_accesses(vmx, vm, 0);
+	vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
+
+	while (!done) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+		if (apic_access_addr == high_gpa) {
+			TEST_ASSERT(run->exit_reason ==
+				    KVM_EXIT_INTERNAL_ERROR,
+				    "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n",
+				    run->exit_reason,
+				    exit_reason_str(run->exit_reason));
+			TEST_ASSERT(run->internal.suberror ==
+				    KVM_INTERNAL_ERROR_EMULATION,
+				    "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+				    run->internal.suberror);
+			break;
+		}
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			apic_access_addr = uc.args[1];
+			break;
+		case UCALL_DONE:
+			done = true;
+			break;
+		default:
+			TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+		}
+	}
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
new file mode 100644
index 000000000..fe40ade06
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_close_while_nested
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID		5
+
+enum {
+	PORT_L0_EXIT = 0x2000,
+};
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+	/* Exit to L0 */
+        asm volatile("inb %%dx, %%al"
+                     : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva;
+
+	nested_vmx_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/* Allocate VMX pages and shared descriptors (vmx_pages). */
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		if (run->io.port == PORT_L0_EXIT)
+			break;
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
new file mode 100644
index 000000000..e894a638a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID				1
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX		1
+#define TEST_MEM_PAGES			3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM			0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1		0xc0001000
+#define NESTED_TEST_MEM2		0xc0002000
+
+static void l2_guest_code(void)
+{
+	*(volatile uint64_t *)NESTED_TEST_MEM1;
+	*(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+	GUEST_SYNC(true);
+	GUEST_SYNC(false);
+
+	*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+	GUEST_SYNC(true);
+	*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+	GUEST_SYNC(true);
+	GUEST_SYNC(false);
+
+	/* Exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(vmx->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+	GUEST_ASSERT(load_vmcs(vmx));
+
+	prepare_vmcs(vmx, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(false);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_SYNC(false);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+	struct vmx_pages *vmx;
+	unsigned long *bmap;
+	uint64_t *host_test_mem;
+
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct ucall uc;
+	bool done = false;
+
+	nested_vmx_check_supported();
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Add an extra memory slot for testing dirty logging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    GUEST_TEST_MEM,
+				    TEST_MEM_SLOT_INDEX,
+				    TEST_MEM_PAGES,
+				    KVM_MEM_LOG_DIRTY_PAGES);
+
+	/*
+	 * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
+	 * affects both L1 and L2.  However...
+	 */
+	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+
+	/*
+	 * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+	 * 0xc0000000.
+	 *
+	 * Note that prepare_eptp should be called only L1's GPA map is done,
+	 * meaning after the last call to virt_map.
+	 */
+	prepare_eptp(vmx, vm, 0);
+	nested_map_memslot(vmx, vm, 0, 0);
+	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
+	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+
+	bmap = bitmap_alloc(TEST_MEM_PAGES);
+	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+	while (!done) {
+		memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+			       	  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			/*
+			 * The nested guest wrote at offset 0x1000 in the memslot, but the
+			 * dirty bitmap must be filled in according to L1 GPA, not L2.
+			 */
+			kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+			if (uc.args[1]) {
+				TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
+				TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+			} else {
+				TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
+				TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+			}
+
+			TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
+			TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
+			TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
+			TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+			break;
+		case UCALL_DONE:
+			done = true;
+			break;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000..a7737af12
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+#define PREEMPTION_TIMER_VALUE			100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+	u64 vmx_pt_delta;
+
+	vmcall();
+	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	/*
+	 * Wait until the 1st threshold has passed
+	 */
+	do {
+		l2_vmx_pt_finish = rdtsc();
+		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+				vmx_pt_rate;
+	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+	/*
+	 * Force L2 through Save and Restore cycle
+	 */
+	GUEST_SYNC(1);
+
+	l2_save_restore_done = 1;
+
+	/*
+	 * Now wait for the preemption timer to fire and
+	 * exit to L1
+	 */
+	while ((l2_vmx_pt_finish = rdtsc()))
+		;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 l1_vmx_pt_start;
+	u64 l1_vmx_pt_finish;
+	u64 l1_tsc_deadline, l2_tsc_deadline;
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Check for Preemption timer support
+	 */
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+			: MSR_IA32_VMX_EXIT_CTLS);
+
+	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		return;
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+	/*
+	 * Turn on PIN control and resume the guest
+	 */
+	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+			      PIN_BASED_VMX_PREEMPTION_TIMER));
+
+	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+			      PREEMPTION_TIMER_VALUE));
+
+	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+	l2_save_restore_done = 0;
+
+	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	GUEST_ASSERT(!vmresume());
+
+	l1_vmx_pt_finish = rdtsc();
+
+	/*
+	 * Ensure exit from L2 happens after L2 goes through
+	 * save and restore
+	 */
+	GUEST_ASSERT(l2_save_restore_done);
+
+	/*
+	 * Ensure the exit from L2 is due to preemption timer expiry
+	 */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+	l1_tsc_deadline = l1_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	l2_tsc_deadline = l2_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	/*
+	 * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+	 * tsc deadlines so that host can verify they are as expected
+	 */
+	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+		l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/*
+	 * AMD currently does not implement any VMX features, so for now we
+	 * just early out.
+	 */
+	nested_vmx_check_supported();
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		pr_info("will skip vmx preemption timer checks\n");
+		goto done;
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+		/*
+		 * If this stage 2 then we should verify the vmx pt expiry
+		 * is as expected.
+		 * From L1's perspective verify Preemption timer hasn't
+		 * expired too early.
+		 * From L2's perspective verify Preemption timer hasn't
+		 * expired too late.
+		 */
+		if (stage == 2) {
+
+			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+				stage, uc.args[2], uc.args[3]);
+
+			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+				stage, uc.args[4], uc.args[5]);
+
+			TEST_ASSERT(uc.args[2] >= uc.args[3],
+				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+				stage, uc.args[2], uc.args[3]);
+
+			TEST_ASSERT(uc.args[4] < uc.args[5],
+				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+				stage, uc.args[4], uc.args[5]);
+		}
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
new file mode 100644
index 000000000..d59f3eb67
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_set_nested_state_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <errno.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/*
+ * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
+ * changes this should be updated.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+#define VCPU_ID 5
+
+bool have_evmcs;
+
+void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+{
+	vcpu_nested_state_set(vm, VCPU_ID, state, false);
+}
+
+void test_nested_state_expect_errno(struct kvm_vm *vm,
+				    struct kvm_nested_state *state,
+				    int expected_errno)
+{
+	int rv;
+
+	rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+	TEST_ASSERT(rv == -1 && errno == expected_errno,
+		"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
+		strerror(expected_errno), expected_errno, rv, strerror(errno),
+		errno);
+}
+
+void test_nested_state_expect_einval(struct kvm_vm *vm,
+				     struct kvm_nested_state *state)
+{
+	test_nested_state_expect_errno(vm, state, EINVAL);
+}
+
+void test_nested_state_expect_efault(struct kvm_vm *vm,
+				     struct kvm_nested_state *state)
+{
+	test_nested_state_expect_errno(vm, state, EFAULT);
+}
+
+void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
+				u32 vmcs12_revision)
+{
+	/* Set revision_id in vmcs12 to vmcs12_revision. */
+	memcpy(&state->data, &vmcs12_revision, sizeof(u32));
+}
+
+void set_default_state(struct kvm_nested_state *state)
+{
+	memset(state, 0, sizeof(*state));
+	state->flags = KVM_STATE_NESTED_RUN_PENDING |
+		       KVM_STATE_NESTED_GUEST_MODE;
+	state->format = 0;
+	state->size = sizeof(*state);
+}
+
+void set_default_vmx_state(struct kvm_nested_state *state, int size)
+{
+	memset(state, 0, size);
+	if (have_evmcs)
+		state->flags = KVM_STATE_NESTED_EVMCS;
+	state->format = 0;
+	state->size = size;
+	state->hdr.vmx.vmxon_pa = 0x1000;
+	state->hdr.vmx.vmcs12_pa = 0x2000;
+	state->hdr.vmx.smm.flags = 0;
+	set_revision_id_for_vmcs12(state, VMCS12_REVISION);
+}
+
+void test_vmx_nested_state(struct kvm_vm *vm)
+{
+	/* Add a page for VMCS12. */
+	const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+	struct kvm_nested_state *state =
+		(struct kvm_nested_state *)malloc(state_sz);
+
+	/* The format must be set to 0. 0 for VMX, 1 for SVM. */
+	set_default_vmx_state(state, state_sz);
+	state->format = 1;
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * We cannot virtualize anything if the guest does not have VMX
+	 * enabled.
+	 */
+	set_default_vmx_state(state, state_sz);
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * We cannot virtualize anything if the guest does not have VMX
+	 * enabled.  We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
+	 * is set to -1ull, but the flags must be zero.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = -1ull;
+	test_nested_state_expect_einval(vm, state);
+
+	state->hdr.vmx.vmcs12_pa = -1ull;
+	state->flags = KVM_STATE_NESTED_EVMCS;
+	test_nested_state_expect_einval(vm, state);
+
+	state->flags = 0;
+	test_nested_state(vm, state);
+
+	/* Enable VMX in the guest CPUID. */
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/*
+	 * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
+	 * setting the nested state but flags other than eVMCS must be clear.
+	 * The eVMCS flag can be set if the enlightened VMCS capability has
+	 * been enabled.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = -1ull;
+	state->hdr.vmx.vmcs12_pa = -1ull;
+	test_nested_state_expect_einval(vm, state);
+
+	state->flags &= KVM_STATE_NESTED_EVMCS;
+	if (have_evmcs) {
+		test_nested_state_expect_einval(vm, state);
+		vcpu_enable_evmcs(vm, VCPU_ID);
+	}
+	test_nested_state(vm, state);
+
+	/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
+	state->hdr.vmx.smm.flags = 1;
+	test_nested_state_expect_einval(vm, state);
+
+	/* Invalid flags are rejected. */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.flags = ~0;
+	test_nested_state_expect_einval(vm, state);
+
+	/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = -1ull;
+	state->flags = 0;
+	test_nested_state_expect_einval(vm, state);
+
+	/* It is invalid to have vmxon_pa set to a non-page aligned address. */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = 1;
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
+	 * KVM_STATE_NESTED_GUEST_MODE set together.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->flags = KVM_STATE_NESTED_GUEST_MODE  |
+		      KVM_STATE_NESTED_RUN_PENDING;
+	state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * It is invalid to have any of the SMM flags set besides:
+	 *	KVM_STATE_NESTED_SMM_GUEST_MODE
+	 *	KVM_STATE_NESTED_SMM_VMXON
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
+				KVM_STATE_NESTED_SMM_VMXON);
+	test_nested_state_expect_einval(vm, state);
+
+	/* Outside SMM, SMM flags must be zero. */
+	set_default_vmx_state(state, state_sz);
+	state->flags = 0;
+	state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * Size must be large enough to fit kvm_nested_state and vmcs12
+	 * if VMCS12 physical address is set
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->size = sizeof(*state);
+	state->flags = 0;
+	test_nested_state_expect_einval(vm, state);
+
+	set_default_vmx_state(state, state_sz);
+	state->size = sizeof(*state);
+	state->flags = 0;
+	state->hdr.vmx.vmcs12_pa = -1;
+	test_nested_state(vm, state);
+
+	/*
+	 * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+	 * contents but L2 not running.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->flags = 0;
+	test_nested_state(vm, state);
+
+	/* Invalid flags are rejected, even if no VMCS loaded. */
+	set_default_vmx_state(state, state_sz);
+	state->size = sizeof(*state);
+	state->flags = 0;
+	state->hdr.vmx.vmcs12_pa = -1;
+	state->hdr.vmx.flags = ~0;
+	test_nested_state_expect_einval(vm, state);
+
+	/* vmxon_pa cannot be the same address as vmcs_pa. */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = 0;
+	state->hdr.vmx.vmcs12_pa = 0;
+	test_nested_state_expect_einval(vm, state);
+
+	/*
+	 * Test that if we leave nesting the state reflects that when we get
+	 * it again.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.vmxon_pa = -1ull;
+	state->hdr.vmx.vmcs12_pa = -1ull;
+	state->flags = 0;
+	test_nested_state(vm, state);
+	vcpu_nested_state_get(vm, VCPU_ID, state);
+	TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+		    "Size must be between %ld and %d.  The size returned was %d.",
+		    sizeof(*state), state_sz, state->size);
+	TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
+	TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+	free(state);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_nested_state state;
+
+	have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+
+	if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		print_skip("KVM_CAP_NESTED_STATE not available");
+		exit(KSFT_SKIP);
+	}
+
+	/*
+	 * AMD currently does not implement set_nested_state, so for now we
+	 * just early out.
+	 */
+	nested_vmx_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, 0);
+
+	/* Passing a NULL kvm_nested_state causes a EFAULT. */
+	test_nested_state_expect_efault(vm, NULL);
+
+	/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
+	set_default_state(&state);
+	state.size = 0;
+	test_nested_state_expect_einval(vm, &state);
+
+	/*
+	 * Setting the flags 0xf fails the flags check.  The only flags that
+	 * can be used are:
+	 *     KVM_STATE_NESTED_GUEST_MODE
+	 *     KVM_STATE_NESTED_RUN_PENDING
+	 *     KVM_STATE_NESTED_EVMCS
+	 */
+	set_default_state(&state);
+	state.flags = 0xf;
+	test_nested_state_expect_einval(vm, &state);
+
+	/*
+	 * If KVM_STATE_NESTED_RUN_PENDING is set then
+	 * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
+	 */
+	set_default_state(&state);
+	state.flags = KVM_STATE_NESTED_RUN_PENDING;
+	test_nested_state_expect_einval(vm, &state);
+
+	test_vmx_nested_state(vm);
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000..fbe8417cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_tsc_adjust_test
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE	4096
+#define VCPU_ID		5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+	PORT_ABORT = 0x1000,
+	PORT_REPORT,
+	PORT_DONE,
+};
+
+enum {
+	VMXON_PAGE = 0,
+	VMCS_PAGE,
+	MSR_BITMAP_PAGE,
+
+	NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+	struct kvm_msrs header;
+	struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+	int64_t adjust;
+
+	adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+	GUEST_SYNC(adjust);
+	GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+	uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+	wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+	/* Exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+	uintptr_t save_cr3;
+
+	GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+	wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+	vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+	/* Jump into L2.  First, test failure to load guest CR3.  */
+	save_cr3 = vmreadz(GUEST_CR3);
+	vmwrite(GUEST_CR3, -1ull);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+		     (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+	vmwrite(GUEST_CR3, save_cr3);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+	GUEST_DONE();
+}
+
+static void report(int64_t val)
+{
+	pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+		val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva;
+
+	nested_vmx_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/* Allocate VMX pages and shared descriptors (vmx_pages). */
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			report(uc.args[1]);
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+	kvm_vm_free(vm);
+done:
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
new file mode 100644
index 000000000..352937674
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID	      1
+#define MSR_BITS      64
+
+#define X86_FEATURE_XSAVES	(1<<3)
+
+bool is_supported_msr(u32 msr_index)
+{
+	struct kvm_msr_list *list;
+	bool found = false;
+	int i;
+
+	list = kvm_get_msr_index_list();
+	for (i = 0; i < list->nmsrs; ++i) {
+		if (list->indices[i] == msr_index) {
+			found = true;
+			break;
+		}
+	}
+
+	free(list);
+	return found;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_cpuid_entry2 *entry;
+	bool xss_supported = false;
+	struct kvm_vm *vm;
+	uint64_t xss_val;
+	int i, r;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, 0);
+
+	if (kvm_get_cpuid_max_basic() >= 0xd) {
+		entry = kvm_get_supported_cpuid_index(0xd, 1);
+		xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
+	}
+	if (!xss_supported) {
+		print_skip("IA32_XSS is not supported by the vCPU");
+		exit(KSFT_SKIP);
+	}
+
+	xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+	TEST_ASSERT(xss_val == 0,
+		    "MSR_IA32_XSS should be initialized to zero\n");
+
+	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
+	/*
+	 * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+	 * that trying to set the guest IA32_XSS to an unsupported value fails.
+	 * Also, in the future when a non-zero value succeeds check that
+	 * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+	 */
+	for (i = 0; i < MSR_BITS; ++i) {
+		r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
+		TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
+			    "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+	}
+
+	kvm_vm_free(vm);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
commit	5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree	a94efe259b9009378be6d90eb30d2b019d95c194 /tools/testing/selftests/kvm
parent	Initial commit. (diff)
download	linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip