summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig66
-rw-r--r--arch/arm64/kvm/Makefile39
-rw-r--r--arch/arm64/kvm/debug.c297
-rw-r--r--arch/arm64/kvm/fpsimd.c128
-rw-r--r--arch/arm64/kvm/guest.c553
-rw-r--r--arch/arm64/kvm/handle_exit.c315
-rw-r--r--arch/arm64/kvm/hyp-init.S176
-rw-r--r--arch/arm64/kvm/hyp.S48
-rw-r--r--arch/arm64/kvm/hyp/Makefile31
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c211
-rw-r--r--arch/arm64/kvm/hyp/entry.S187
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S33
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S401
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c90
-rw-r--r--arch/arm64/kvm/hyp/switch.c702
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c306
-rw-r--r--arch/arm64/kvm/hyp/tlb.c173
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c92
-rw-r--r--arch/arm64/kvm/inject_fault.c248
-rw-r--r--arch/arm64/kvm/irq.h19
-rw-r--r--arch/arm64/kvm/regmap.c206
-rw-r--r--arch/arm64/kvm/reset.c181
-rw-r--r--arch/arm64/kvm/sys_regs.c2606
-rw-r--r--arch/arm64/kvm/sys_regs.h137
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c98
-rw-r--r--arch/arm64/kvm/trace.h181
-rw-r--r--arch/arm64/kvm/va_layout.c227
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c314
28 files changed, 8065 insertions, 0 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000..47b23bf61
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ bool "Kernel-based Virtual Machine (KVM) support"
+ depends on OF
+ select MMU_NOTIFIER
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_ARCH_TLB_FLUSH_ALL
+ select KVM_MMIO
+ select KVM_ARM_HOST
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select SRCU
+ select KVM_VFIO
+ select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQFD
+ select KVM_ARM_PMU if HW_PERF_EVENTS
+ select HAVE_KVM_MSI
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
+ select HAVE_KVM_VCPU_RUN_PID_CHANGE
+ ---help---
+ Support hosting virtualized guest machines.
+ We don't support KVM with 16K page tables yet, due to the multiple
+ levels of fake page tables.
+
+ If unsure, say N.
+
+config KVM_ARM_HOST
+ bool
+ ---help---
+ Provides host support for ARM processors.
+
+config KVM_ARM_PMU
+ bool
+ ---help---
+ Adds support for a virtual Performance Monitoring Unit (PMU) in
+ virtual machines.
+
+config KVM_INDIRECT_VECTORS
+ def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000..0f2a135ba
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp/
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
new file mode 100644
index 000000000..7fe195ef7
--- /dev/null
+++ b/arch/arm64/kvm/debug.c
@@ -0,0 +1,297 @@
+/*
+ * Debug and Guest Debug support
+ *
+ * Copyright (C) 2015 - Linaro Ltd
+ * Author: Alex Bennée <alex.bennee@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+
+#include "trace.h"
+
+/* These are the bits of MDSCR_EL1 we may manipulate */
+#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
+ DBG_MDSCR_KDE | \
+ DBG_MDSCR_MDE)
+
+static DEFINE_PER_CPU(u32, mdcr_el2);
+
+/**
+ * save/restore_guest_debug_regs
+ *
+ * For some debug operations we need to tweak some guest registers. As
+ * a result we need to save the state of those registers before we
+ * make those modifications.
+ *
+ * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
+ * after we have restored the preserved value to the main context.
+ */
+static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
+{
+ u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+
+ vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
+
+ trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
+ vcpu->arch.guest_debug_preserved.mdscr_el1);
+}
+
+static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
+{
+ u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+ vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
+
+ trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
+ vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+}
+
+/**
+ * kvm_arm_init_debug - grab what we need for debug
+ *
+ * Currently the sole task of this function is to retrieve the initial
+ * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
+ * presumably been set-up by some knowledgeable bootcode.
+ *
+ * It is called once per-cpu during CPU hyp initialisation.
+ */
+
+void kvm_arm_init_debug(void)
+{
+ __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
+}
+
+/**
+ * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * This ensures we will trap access to:
+ * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
+ * - Debug ROM Address (MDCR_EL2_TDRA)
+ * - OS related registers (MDCR_EL2_TDOSA)
+ * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
+ * - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
+ */
+static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
+{
+ /*
+ * This also clears MDCR_EL2_E2PB_MASK to disable guest access
+ * to the profiling buffer.
+ */
+ vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
+ vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
+ MDCR_EL2_TPMS |
+ MDCR_EL2_TTRF |
+ MDCR_EL2_TPMCR |
+ MDCR_EL2_TDRA |
+ MDCR_EL2_TDOSA);
+
+ /* Is the VM being debugged by userspace? */
+ if (vcpu->guest_debug)
+ /* Route all software debug exceptions to EL2 */
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
+
+ /*
+ * Trap debug register access when one of the following is true:
+ * - Userspace is using the hardware to debug the guest
+ * (KVM_GUESTDBG_USE_HW is set).
+ * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
+ */
+ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
+ !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+
+ trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
+}
+
+/**
+ * kvm_arm_vcpu_init_debug - setup vcpu debug traps
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * Set vcpu initial mdcr_el2 value.
+ */
+void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ kvm_arm_setup_mdcr_el2(vcpu);
+ preempt_enable();
+}
+
+/**
+ * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
+ */
+
+void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
+}
+
+/**
+ * kvm_arm_setup_debug - set up debug related stuff
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * This is called before each entry into the hypervisor to setup any
+ * debug related registers.
+ *
+ * Additionally, KVM only traps guest accesses to the debug registers if
+ * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
+ * flag on vcpu->arch.flags). Since the guest must not interfere
+ * with the hardware state when debugging the guest, we must ensure that
+ * trapping is enabled whenever we are debugging the guest using the
+ * debug registers.
+ */
+
+void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
+{
+ unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
+
+ trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
+
+ kvm_arm_setup_mdcr_el2(vcpu);
+
+ /* Is Guest debugging in effect? */
+ if (vcpu->guest_debug) {
+ /* Save guest debug state */
+ save_guest_debug_regs(vcpu);
+
+ /*
+ * Single Step (ARM ARM D2.12.3 The software step state
+ * machine)
+ *
+ * If we are doing Single Step we need to manipulate
+ * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
+ * step has occurred the hypervisor will trap the
+ * debug exception and we return to userspace.
+ *
+ * If the guest attempts to single step its userspace
+ * we would have to deal with a trapped exception
+ * while in the guest kernel. Because this would be
+ * hard to unwind we suppress the guest's ability to
+ * do so by masking MDSCR_EL.SS.
+ *
+ * This confuses guest debuggers which use
+ * single-step behind the scenes but everything
+ * returns to normal once the host is no longer
+ * debugging the system.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
+ } else {
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
+ }
+
+ trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
+
+ /*
+ * HW Breakpoints and watchpoints
+ *
+ * We simply switch the debug_ptr to point to our new
+ * external_debug_state which has been populated by the
+ * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
+ * mechanism ensures the registers are updated on the
+ * world switch.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ /* Enable breakpoints/watchpoints */
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
+
+ vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+
+ trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
+ &vcpu->arch.debug_ptr->dbg_bcr[0],
+ &vcpu->arch.debug_ptr->dbg_bvr[0]);
+
+ trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
+ &vcpu->arch.debug_ptr->dbg_wcr[0],
+ &vcpu->arch.debug_ptr->dbg_wvr[0]);
+ }
+ }
+
+ BUG_ON(!vcpu->guest_debug &&
+ vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
+
+ /* If KDE or MDE are set, perform a full save/restore cycle. */
+ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+
+ /* Write mdcr_el2 changes since vcpu_load on VHE systems */
+ if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
+ trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+}
+
+void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
+{
+ trace_kvm_arm_clear_debug(vcpu->guest_debug);
+
+ if (vcpu->guest_debug) {
+ restore_guest_debug_regs(vcpu);
+
+ /*
+ * If we were using HW debug we need to restore the
+ * debug_ptr to the guest debug state.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ kvm_arm_reset_debug_ptr(vcpu);
+
+ trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
+ &vcpu->arch.debug_ptr->dbg_bcr[0],
+ &vcpu->arch.debug_ptr->dbg_bvr[0]);
+
+ trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
+ &vcpu->arch.debug_ptr->dbg_wcr[0],
+ &vcpu->arch.debug_ptr->dbg_wvr[0]);
+ }
+ }
+}
+
+
+/*
+ * After successfully emulating an instruction, we might want to
+ * return to user space with a KVM_EXIT_DEBUG. We can only do this
+ * once the emulation is complete, though, so for userspace emulations
+ * we have to wait until we have re-entered KVM before calling this
+ * helper.
+ *
+ * Return true (and set exit_reason) to return to userspace or false
+ * if no further action is required.
+ */
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
+ return true;
+ }
+ return false;
+}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
new file mode 100644
index 000000000..aac7808ce
--- /dev/null
+++ b/arch/arm64/kvm/fpsimd.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/arm64/kvm/fpsimd.c: Guest/host FPSIMD context coordination helpers
+ *
+ * Copyright 2018 Arm Limited
+ * Author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <linux/irqflags.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
+
+/*
+ * Called on entry to KVM_RUN unless this vcpu previously ran at least
+ * once and the most recent prior KVM_RUN for this vcpu was called from
+ * the same task as current (highly likely).
+ *
+ * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
+ * such that on entering hyp the relevant parts of current are already
+ * mapped.
+ */
+int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ struct thread_info *ti = &current->thread_info;
+ struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+
+ /*
+ * Make sure the host task thread flags and fpsimd state are
+ * visible to hyp:
+ */
+ ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
+ if (ret)
+ goto error;
+
+ ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+ if (ret)
+ goto error;
+
+ vcpu->arch.host_thread_info = kern_hyp_va(ti);
+ vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
+error:
+ return ret;
+}
+
+/*
+ * Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
+ * The actual loading is done by the FPSIMD access trap taken to hyp.
+ *
+ * Here, we just set the correct metadata to indicate that the FPSIMD
+ * state in the cpu regs (if any) belongs to current on the host.
+ *
+ * TIF_SVE is backed up here, since it may get clobbered with guest state.
+ * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
+ */
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(!current->mm);
+
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_HOST_SVE_IN_USE |
+ KVM_ARM64_HOST_SVE_ENABLED);
+ vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+
+ if (test_thread_flag(TIF_SVE))
+ vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+ vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
+}
+
+/*
+ * If the guest FPSIMD state was loaded, update the host's context
+ * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
+ * so that they will be written back if the kernel clobbers them due to
+ * kernel-mode NEON before re-entry into the guest.
+ */
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!irqs_disabled());
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ clear_thread_flag(TIF_FOREIGN_FPSTATE);
+ clear_thread_flag(TIF_SVE);
+ }
+}
+
+/*
+ * Write back the vcpu FPSIMD regs if they are dirty, and invalidate the
+ * cpu FPSIMD regs so that they can't be spuriously reused if this vcpu
+ * disappears and another task or vcpu appears that recycles the same
+ * struct fpsimd_state.
+ */
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ /* Clean guest FP state to memory and invalidate cpu view */
+ fpsimd_save();
+ fpsimd_flush_cpu_state();
+ } else if (system_supports_sve()) {
+ /*
+ * The FPSIMD/SVE state in the CPU has not been touched, and we
+ * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
+ * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
+ * for EL0. To avoid spurious traps, restore the trap state
+ * seen by kvm_arch_vcpu_load_fp():
+ */
+ if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
+ else
+ sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
+ }
+
+ update_thread_flag(TIF_SVE,
+ vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
+ local_irq_restore(flags);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000..870e594f9
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <kvm/arm_psci.h>
+#include <asm/cputype.h>
+#include <linux/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+#include "trace.h"
+
+#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
+#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(hvc_exit_stat),
+ VCPU_STAT(wfe_exit_stat),
+ VCPU_STAT(wfi_exit_stat),
+ VCPU_STAT(mmio_exit_user),
+ VCPU_STAT(mmio_exit_kernel),
+ VCPU_STAT(exits),
+ { NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+ return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int validate_core_offset(const struct kvm_one_reg *reg)
+{
+ u64 off = core_reg_offset_from_id(reg->id);
+ int size;
+
+ switch (off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+ KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+ size = sizeof(__u64);
+ break;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ size = sizeof(__uint128_t);
+ break;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ size = sizeof(__u32);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (KVM_REG_SIZE(reg->id) == size &&
+ IS_ALIGNED(off, size / sizeof(__u32)))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /*
+ * Because the kvm_regs structure is a mix of 32, 64 and
+ * 128bit fields, we index it as if it was a 32bit
+ * array. Hence below, nr_regs is the number of entries, and
+ * off the index in the "array".
+ */
+ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+ struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+ int nr_regs = sizeof(*regs) / sizeof(__u32);
+ u32 off;
+
+ /* Our ID is an index into the kvm_regs struct. */
+ off = core_reg_offset_from_id(reg->id);
+ if (off >= nr_regs ||
+ (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+ return -ENOENT;
+
+ if (validate_core_offset(reg))
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+ struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+ int nr_regs = sizeof(*regs) / sizeof(__u32);
+ __uint128_t tmp;
+ void *valp = &tmp;
+ u64 off;
+ int err = 0;
+
+ /* Our ID is an index into the kvm_regs struct. */
+ off = core_reg_offset_from_id(reg->id);
+ if (off >= nr_regs ||
+ (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+ return -ENOENT;
+
+ if (validate_core_offset(reg))
+ return -EINVAL;
+
+ if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+ return -EINVAL;
+
+ if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+ u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
+ switch (mode) {
+ case PSR_AA32_MODE_USR:
+ if (!system_supports_32bit_el0())
+ return -EINVAL;
+ break;
+ case PSR_AA32_MODE_FIQ:
+ case PSR_AA32_MODE_IRQ:
+ case PSR_AA32_MODE_SVC:
+ case PSR_AA32_MODE_ABT:
+ case PSR_AA32_MODE_UND:
+ if (!vcpu_el1_is_32bit(vcpu))
+ return -EINVAL;
+ break;
+ case PSR_MODE_EL0t:
+ case PSR_MODE_EL1t:
+ case PSR_MODE_EL1h:
+ if (vcpu_el1_is_32bit(vcpu))
+ return -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+
+ if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
+ int i;
+
+ for (i = 0; i < 16; i++)
+ *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+ }
+out:
+ return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+ return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+ switch (index) {
+ case KVM_REG_ARM_TIMER_CTL:
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_TIMER_CVAL:
+ return true;
+ }
+ return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int ret;
+
+ ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+ if (ret != 0)
+ return -EFAULT;
+
+ return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ val = kvm_arm_timer_get_reg(vcpu, reg->id);
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+ return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+ + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS;
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we append system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ unsigned int i;
+ const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+ int ret;
+
+ for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+ if (put_user(core_reg | i, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += kvm_arm_get_fw_num_regs(vcpu);
+
+ ret = copy_timer_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += NUM_TIMER_REGS;
+
+ return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /* We currently use nothing arch-specific in upper 32 bits */
+ if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+ return -EINVAL;
+
+ /* Register group 16 means we want a core register. */
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+ return get_core_reg(vcpu, reg);
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+ return kvm_arm_get_fw_reg(vcpu, reg);
+
+ if (is_timer_reg(reg->id))
+ return get_timer_reg(vcpu, reg);
+
+ return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /* We currently use nothing arch-specific in upper 32 bits */
+ if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+ return -EINVAL;
+
+ /* Register group 16 means we set a core register. */
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+ return set_core_reg(vcpu, reg);
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+ return kvm_arm_set_fw_reg(vcpu, reg);
+
+ if (is_timer_reg(reg->id))
+ return set_timer_reg(vcpu, reg);
+
+ return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
+ events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+
+ if (events->exception.serror_pending && events->exception.serror_has_esr)
+ events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+
+ return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ bool serror_pending = events->exception.serror_pending;
+ bool has_esr = events->exception.serror_has_esr;
+
+ if (serror_pending && has_esr) {
+ if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ return -EINVAL;
+
+ if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
+ kvm_set_sei_esr(vcpu, events->exception.serror_esr);
+ else
+ return -EINVAL;
+ } else if (serror_pending) {
+ kvm_inject_vabt(vcpu);
+ }
+
+ return 0;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+ unsigned long implementor = read_cpuid_implementor();
+ unsigned long part_number = read_cpuid_part_number();
+
+ switch (implementor) {
+ case ARM_CPU_IMP_ARM:
+ switch (part_number) {
+ case ARM_CPU_PART_AEM_V8:
+ return KVM_ARM_TARGET_AEM_V8;
+ case ARM_CPU_PART_FOUNDATION:
+ return KVM_ARM_TARGET_FOUNDATION_V8;
+ case ARM_CPU_PART_CORTEX_A53:
+ return KVM_ARM_TARGET_CORTEX_A53;
+ case ARM_CPU_PART_CORTEX_A57:
+ return KVM_ARM_TARGET_CORTEX_A57;
+ };
+ break;
+ case ARM_CPU_IMP_APM:
+ switch (part_number) {
+ case APM_CPU_PART_POTENZA:
+ return KVM_ARM_TARGET_XGENE_POTENZA;
+ };
+ break;
+ };
+
+ /* Return a default generic target */
+ return KVM_ARM_TARGET_GENERIC_V8;
+}
+
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
+
+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW | \
+ KVM_GUESTDBG_SINGLESTEP)
+
+/**
+ * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
+ * @kvm: pointer to the KVM struct
+ * @kvm_guest_debug: the ioctl data buffer
+ *
+ * This sets up and enables the VM for guest debugging. Userspace
+ * passes in a control flag to enable different debug types and
+ * potentially other architecture specific information in the rest of
+ * the structure.
+ */
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ int ret = 0;
+
+ trace_kvm_set_guest_debug(vcpu, dbg->control);
+
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+
+ /* Hardware assisted Break and Watch points */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ vcpu->arch.external_debug_state = dbg->arch;
+ }
+
+ } else {
+ /* If not enabled clear all flags */
+ vcpu->guest_debug = 0;
+ }
+
+out:
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
+ break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
+ break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
+ break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000..e5e741bff
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+#include <asm/debug-monitors.h>
+#include <asm/traps.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+{
+ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
+ kvm_inject_vabt(vcpu);
+}
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int ret;
+
+ trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
+ kvm_vcpu_hvc_get_imm(vcpu));
+ vcpu->stat.hvc_exit_stat++;
+
+ ret = kvm_hvc_call_handler(vcpu);
+ if (ret < 0) {
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ return 1;
+ }
+
+ return ret;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /*
+ * "If an SMC instruction executed at Non-secure EL1 is
+ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
+ * Trap exception, not a Secure Monitor Call exception [...]"
+ *
+ * We need to advance the PC after the trap, as it would
+ * otherwise return to the same address...
+ */
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ return 1;
+}
+
+/*
+ * Guest access to FP/ASIMD registers are routed to this handler only
+ * when the system doesn't support FP/ASIMD.
+ */
+static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+/**
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ * instruction executed by a guest
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+ trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
+ vcpu->stat.wfe_exit_stat++;
+ kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
+ } else {
+ trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
+ vcpu->stat.wfi_exit_stat++;
+ kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ }
+
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+
+ return 1;
+}
+
+/**
+ * kvm_handle_guest_debug - handle a debug exception instruction
+ *
+ * @vcpu: the vcpu pointer
+ * @run: access to the kvm_run structure for results
+ *
+ * We route all debug exceptions through the same handler. If both the
+ * guest and host are using the same debug facilities it will be up to
+ * userspace to re-inject the correct exception for guest delivery.
+ *
+ * @return: 0 (while setting run->exit_reason), -1 for error
+ */
+static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int ret = 0;
+
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.hsr = hsr;
+
+ switch (ESR_ELx_EC(hsr)) {
+ case ESR_ELx_EC_WATCHPT_LOW:
+ run->debug.arch.far = vcpu->arch.fault.far_el2;
+ /* fall through */
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_BKPT32:
+ case ESR_ELx_EC_BRK64:
+ break;
+ default:
+ kvm_err("%s: un-handled case hsr: %#08x\n",
+ __func__, (unsigned int) hsr);
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+ hsr, esr_get_class_string(hsr));
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* Until SVE is supported for guests: */
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
+ [ESR_ELx_EC_WFx] = kvm_handle_wfx,
+ [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
+ [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
+ [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32,
+ [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
+ [ESR_ELx_EC_HVC32] = handle_hvc,
+ [ESR_ELx_EC_SMC32] = handle_smc,
+ [ESR_ELx_EC_HVC64] = handle_hvc,
+ [ESR_ELx_EC_SMC64] = handle_smc,
+ [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
+ [ESR_ELx_EC_SVE] = handle_sve,
+ [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
+ [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
+ [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
+ [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
+ [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u8 hsr_ec = ESR_ELx_EC(hsr);
+
+ return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * We may be single-stepping an emulated instruction. If the emulation
+ * has been completed in the kernel, we can return to userspace with a
+ * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
+ * emulation first.
+ */
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int handled;
+
+ /*
+ * See ARM ARM B1.14.1: "Hyp traps on instructions
+ * that fail their condition code check"
+ */
+ if (!kvm_condition_valid(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ handled = 1;
+ } else {
+ exit_handle_fn exit_handler;
+
+ exit_handler = kvm_get_exit_handler(vcpu);
+ handled = exit_handler(vcpu, run);
+ }
+
+ /*
+ * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
+ * structure if we need to return to userspace.
+ */
+ if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
+ handled = 0;
+
+ return handled;
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ if (ARM_SERROR_PENDING(exception_index)) {
+ u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
+
+ /*
+ * HVC/SMC already have an adjusted PC, which we need
+ * to correct in order to return to after having
+ * injected the SError.
+ */
+ if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 ||
+ hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) {
+ u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
+ *vcpu_pc(vcpu) -= adj;
+ }
+
+ return 1;
+ }
+
+ exception_index = ARM_EXCEPTION_CODE(exception_index);
+
+ switch (exception_index) {
+ case ARM_EXCEPTION_IRQ:
+ return 1;
+ case ARM_EXCEPTION_EL1_SERROR:
+ /* We may still need to return for single-step */
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+ && kvm_arm_handle_step_debug(vcpu, run))
+ return 0;
+ else
+ return 1;
+ case ARM_EXCEPTION_TRAP:
+ return handle_trap_exceptions(vcpu, run);
+ case ARM_EXCEPTION_HYP_GONE:
+ /*
+ * EL2 has been reset to the hyp-stub. This happens when a guest
+ * is pre-empted by kvm_reboot()'s shutdown call.
+ */
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ return 0;
+ default:
+ kvm_pr_unimpl("Unsupported exception type: %d",
+ exception_index);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return 0;
+ }
+}
+
+/* For exit types that need handling before we can be preempted */
+void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ if (ARM_SERROR_PENDING(exception_index)) {
+ if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
+ u64 disr = kvm_vcpu_get_disr(vcpu);
+
+ kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
+ } else {
+ kvm_inject_vabt(vcpu);
+ }
+
+ return;
+ }
+
+ exception_index = ARM_EXCEPTION_CODE(exception_index);
+
+ if (exception_index == ARM_EXCEPTION_EL1_SERROR)
+ kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000..6ef670f18
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+ .text
+ .pushsection .hyp.idmap.text, "ax"
+
+ .align 11
+
+ENTRY(__kvm_hyp_init)
+ ventry __invalid // Synchronous EL2t
+ ventry __invalid // IRQ EL2t
+ ventry __invalid // FIQ EL2t
+ ventry __invalid // Error EL2t
+
+ ventry __invalid // Synchronous EL2h
+ ventry __invalid // IRQ EL2h
+ ventry __invalid // FIQ EL2h
+ ventry __invalid // Error EL2h
+
+ ventry __do_hyp_init // Synchronous 64-bit EL1
+ ventry __invalid // IRQ 64-bit EL1
+ ventry __invalid // FIQ 64-bit EL1
+ ventry __invalid // Error 64-bit EL1
+
+ ventry __invalid // Synchronous 32-bit EL1
+ ventry __invalid // IRQ 32-bit EL1
+ ventry __invalid // FIQ 32-bit EL1
+ ventry __invalid // Error 32-bit EL1
+
+__invalid:
+ b .
+
+ /*
+ * x0: HYP pgd
+ * x1: HYP stack
+ * x2: HYP vectors
+ * x3: per-CPU offset
+ */
+__do_hyp_init:
+ /* Check for a stub HVC call */
+ cmp x0, #HVC_STUB_HCALL_NR
+ b.lo __kvm_handle_stub_hvc
+
+ phys_to_ttbr x4, x0
+ msr ttbr0_el2, x4
+
+ mrs x4, tcr_el1
+ ldr x5, =TCR_EL2_MASK
+ and x4, x4, x5
+ mov x5, #TCR_EL2_RES1
+ orr x4, x4, x5
+
+ /*
+ * The ID map may be configured to use an extended virtual address
+ * range. This is only the case if system RAM is out of range for the
+ * currently configured page size and VA_BITS, in which case we will
+ * also need the extended virtual range for the HYP ID map, or we won't
+ * be able to enable the EL2 MMU.
+ *
+ * However, at EL2, there is only one TTBR register, and we can't switch
+ * between translation tables *and* update TCR_EL2.T0SZ at the same
+ * time. Bottom line: we need to use the extended range with *both* our
+ * translation tables.
+ *
+ * So use the same T0SZ value we use for the ID map.
+ */
+ ldr_l x5, idmap_t0sz
+ bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
+ /*
+ * Set the PS bits in TCR_EL2.
+ */
+ tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
+
+ msr tcr_el2, x4
+
+ mrs x4, mair_el1
+ msr mair_el2, x4
+ isb
+
+ /* Invalidate the stale TLBs from Bootloader */
+ tlbi alle2
+ dsb sy
+
+ /*
+ * Preserve all the RES1 bits while setting the default flags,
+ * as well as the EE bit on BE. Drop the A flag since the compiler
+ * is allowed to generate unaligned accesses.
+ */
+ ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
+CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
+ msr sctlr_el2, x4
+ isb
+
+ /* Set the stack and new vectors */
+ kern_hyp_va x1
+ mov sp, x1
+ msr vbar_el2, x2
+
+ /* Set tpidr_el2 for use by HYP */
+ msr tpidr_el2, x3
+
+ /* Hello, World! */
+ eret
+ENDPROC(__kvm_hyp_init)
+
+ENTRY(__kvm_handle_stub_hvc)
+ cmp x0, #HVC_SOFT_RESTART
+ b.ne 1f
+
+ /* This is where we're about to jump, staying at EL2 */
+ msr elr_el2, x1
+ mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h)
+ msr spsr_el2, x0
+
+ /* Shuffle the arguments, and don't come back */
+ mov x0, x2
+ mov x1, x3
+ mov x2, x4
+ b reset
+
+1: cmp x0, #HVC_RESET_VECTORS
+ b.ne 1f
+
+ /*
+ * Set the HVC_RESET_VECTORS return code before entering the common
+ * path so that we do not clobber x0-x2 in case we are coming via
+ * HVC_SOFT_RESTART.
+ */
+ mov x0, xzr
+reset:
+ /* Reset kvm back to the hyp stub. */
+ mrs x5, sctlr_el2
+ ldr x6, =SCTLR_ELx_FLAGS
+ bic x5, x5, x6 // Clear SCTL_M and etc
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x5
+ isb
+
+ /* Install stub vectors */
+ adr_l x5, __hyp_stub_vectors
+ msr vbar_el2, x5
+ eret
+
+1: /* Bad stub call */
+ ldr x0, =HVC_STUB_ERR
+ eret
+
+ENDPROC(__kvm_handle_stub_hvc)
+
+ .ltorg
+
+ .popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000..952f6cb9c
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/cpufeature.h>
+
+/*
+ * u64 __kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed). The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
+ * passed in x0.
+ *
+ * A function pointer with a value less than 0xfff has a special meaning,
+ * and is used to implement hyp stubs in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
+ENTRY(__kvm_call_hyp)
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ hvc #0
+ ret
+alternative_else_nop_endif
+ b __vhe_hyp_call
+ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
new file mode 100644
index 000000000..feef06fc7
--- /dev/null
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP part
+#
+
+ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN)
+
+KVM=../../../../virt/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
+
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
+obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
+obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
+
+# KVM code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
new file mode 100644
index 000000000..3c5414633
--- /dev/null
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+#define read_debug(r,n) read_sysreg(r##n##_el1)
+#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
+
+#define save_debug(ptr,reg,nr) \
+ switch (nr) { \
+ case 15: ptr[15] = read_debug(reg, 15); \
+ case 14: ptr[14] = read_debug(reg, 14); \
+ case 13: ptr[13] = read_debug(reg, 13); \
+ case 12: ptr[12] = read_debug(reg, 12); \
+ case 11: ptr[11] = read_debug(reg, 11); \
+ case 10: ptr[10] = read_debug(reg, 10); \
+ case 9: ptr[9] = read_debug(reg, 9); \
+ case 8: ptr[8] = read_debug(reg, 8); \
+ case 7: ptr[7] = read_debug(reg, 7); \
+ case 6: ptr[6] = read_debug(reg, 6); \
+ case 5: ptr[5] = read_debug(reg, 5); \
+ case 4: ptr[4] = read_debug(reg, 4); \
+ case 3: ptr[3] = read_debug(reg, 3); \
+ case 2: ptr[2] = read_debug(reg, 2); \
+ case 1: ptr[1] = read_debug(reg, 1); \
+ default: ptr[0] = read_debug(reg, 0); \
+ }
+
+#define restore_debug(ptr,reg,nr) \
+ switch (nr) { \
+ case 15: write_debug(ptr[15], reg, 15); \
+ case 14: write_debug(ptr[14], reg, 14); \
+ case 13: write_debug(ptr[13], reg, 13); \
+ case 12: write_debug(ptr[12], reg, 12); \
+ case 11: write_debug(ptr[11], reg, 11); \
+ case 10: write_debug(ptr[10], reg, 10); \
+ case 9: write_debug(ptr[9], reg, 9); \
+ case 8: write_debug(ptr[8], reg, 8); \
+ case 7: write_debug(ptr[7], reg, 7); \
+ case 6: write_debug(ptr[6], reg, 6); \
+ case 5: write_debug(ptr[5], reg, 5); \
+ case 4: write_debug(ptr[4], reg, 4); \
+ case 3: write_debug(ptr[3], reg, 3); \
+ case 2: write_debug(ptr[2], reg, 2); \
+ case 1: write_debug(ptr[1], reg, 1); \
+ default: write_debug(ptr[0], reg, 0); \
+ }
+
+static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
+{
+ u64 reg;
+
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
+ return;
+
+ /* Yes; is it owned by EL3? */
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
+ return;
+
+ /* No; is the host actually using the thing? */
+ reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
+ return;
+
+ /* Yes; save the control register and disable data generation */
+ *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+ write_sysreg_s(0, SYS_PMSCR_EL1);
+ isb();
+
+ /* Now drain all buffered data to memory */
+ psb_csync();
+ dsb(nsh);
+}
+
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
+{
+ if (!pmscr_el1)
+ return;
+
+ /* The host page table is installed, but not yet synchronised */
+ isb();
+
+ /* Re-enable data generation */
+ write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+}
+
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
+{
+ u64 aa64dfr0;
+ int brps, wrps;
+
+ aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+ brps = (aa64dfr0 >> 12) & 0xf;
+ wrps = (aa64dfr0 >> 20) & 0xf;
+
+ save_debug(dbg->dbg_bcr, dbgbcr, brps);
+ save_debug(dbg->dbg_bvr, dbgbvr, brps);
+ save_debug(dbg->dbg_wcr, dbgwcr, wrps);
+ save_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+ ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+}
+
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
+{
+ u64 aa64dfr0;
+ int brps, wrps;
+
+ aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ brps = (aa64dfr0 >> 12) & 0xf;
+ wrps = (aa64dfr0 >> 20) & 0xf;
+
+ restore_debug(dbg->dbg_bcr, dbgbcr, brps);
+ restore_debug(dbg->dbg_bvr, dbgbvr, brps);
+ restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
+ restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+ write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+}
+
+void __hyp_text __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-VHE: Disable and flush SPE data generation
+ * VHE: The vcpu can run, but it can't hide.
+ */
+ __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+}
+
+void __hyp_text __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
+{
+ __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, host_dbg, host_ctxt);
+ __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
+}
+
+void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+
+ if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_restore_state(vcpu, host_dbg, host_ctxt);
+
+ vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
+}
+
+u32 __hyp_text __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
new file mode 100644
index 000000000..fc83e932a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+.macro save_callee_saved_regs ctxt
+ stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+ ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+/*
+ * u64 __guest_enter(struct kvm_vcpu *vcpu,
+ * struct kvm_cpu_context *host_ctxt);
+ */
+ENTRY(__guest_enter)
+ // x0: vcpu
+ // x1: host context
+ // x2-x17: clobbered by macros
+ // x18: guest context
+
+ // Store the host regs
+ save_callee_saved_regs x1
+
+ // Now the host state is stored if we have a pending RAS SError it must
+ // affect the host. If any asynchronous exception is pending we defer
+ // the guest entry. The DSB isn't necessary before v8.2 as any SError
+ // would be fatal.
+alternative_if ARM64_HAS_RAS_EXTN
+ dsb nshst
+ isb
+alternative_else_nop_endif
+ mrs x1, isr_el1
+ cbz x1, 1f
+ mov x0, #ARM_EXCEPTION_IRQ
+ ret
+
+1:
+ add x18, x0, #VCPU_CONTEXT
+
+ // Restore guest regs x0-x17
+ ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x19-x29, lr
+ restore_callee_saved_regs x18
+
+ // Restore guest reg x18
+ ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+
+ // Do not touch any register after this!
+ eret
+ENDPROC(__guest_enter)
+
+ENTRY(__guest_exit)
+ // x0: return code
+ // x1: vcpu
+ // x2-x29,lr: vcpu regs
+ // vcpu x0-x1 on the stack
+
+ add x1, x1, #VCPU_CONTEXT
+
+ ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
+
+ // Store the guest regs x2 and x3
+ stp x2, x3, [x1, #CPU_XREG_OFFSET(2)]
+
+ // Retrieve the guest regs x0-x1 from the stack
+ ldp x2, x3, [sp], #16 // x0, x1
+
+ // Store the guest regs x0-x1 and x4-x18
+ stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
+ stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
+ stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
+ stp x8, x9, [x1, #CPU_XREG_OFFSET(8)]
+ stp x10, x11, [x1, #CPU_XREG_OFFSET(10)]
+ stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
+ stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
+ stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
+ str x18, [x1, #CPU_XREG_OFFSET(18)]
+
+ // Store the guest regs x19-x29, lr
+ save_callee_saved_regs x1
+
+ get_host_ctxt x2, x3
+
+ // Now restore the host regs
+ restore_callee_saved_regs x2
+
+alternative_if ARM64_HAS_RAS_EXTN
+ // If we have the RAS extensions we can consume a pending error
+ // without an unmask-SError and isb.
+ esb
+ mrs_s x2, SYS_DISR_EL1
+ str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
+ cbz x2, 1f
+ msr_s SYS_DISR_EL1, xzr
+ orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
+1: ret
+alternative_else
+ // If we have a pending asynchronous abort, now is the
+ // time to find out. From your VAXorcist book, page 666:
+ // "Threaten me not, oh Evil one! For I speak with
+ // the power of DEC, and I command thee to show thyself!"
+ mrs x2, elr_el2
+ mrs x3, esr_el2
+ mrs x4, spsr_el2
+ mov x5, x0
+
+ dsb sy // Synchronize against in-flight ld/st
+ nop
+ msr daifclr, #4 // Unmask aborts
+alternative_endif
+
+ // This is our single instruction exception window. A pending
+ // SError is guaranteed to occur at the earliest when we unmask
+ // it, and at the latest just after the ISB.
+abort_guest_exit_start:
+
+ isb
+
+abort_guest_exit_end:
+ msr daifset, #4 // Mask aborts
+ ret
+
+ _kvm_extable abort_guest_exit_start, 9997f
+ _kvm_extable abort_guest_exit_end, 9997f
+9997:
+ msr daifset, #4 // Mask aborts
+ mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT)
+
+ // restore the EL1 exception context so that we can report some
+ // information. Merge the exception code with the SError pending bit.
+ msr elr_el2, x2
+ msr esr_el2, x3
+ msr spsr_el2, x4
+ orr x0, x0, x5
+1: ret
+ENDPROC(__guest_exit)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
new file mode 100644
index 000000000..da3f22c7f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/fpsimdmacros.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+ENTRY(__fpsimd_save_state)
+ fpsimd_save x0, 1
+ ret
+ENDPROC(__fpsimd_save_state)
+
+ENTRY(__fpsimd_restore_state)
+ fpsimd_restore x0, 1
+ ret
+ENDPROC(__fpsimd_restore_state)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
new file mode 100644
index 000000000..01e518b82
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2015-2018 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/mmu.h>
+
+.macro save_caller_saved_regs_vect
+ /* x0 and x1 were saved in the vector entry */
+ stp x2, x3, [sp, #-16]!
+ stp x4, x5, [sp, #-16]!
+ stp x6, x7, [sp, #-16]!
+ stp x8, x9, [sp, #-16]!
+ stp x10, x11, [sp, #-16]!
+ stp x12, x13, [sp, #-16]!
+ stp x14, x15, [sp, #-16]!
+ stp x16, x17, [sp, #-16]!
+.endm
+
+.macro restore_caller_saved_regs_vect
+ ldp x16, x17, [sp], #16
+ ldp x14, x15, [sp], #16
+ ldp x12, x13, [sp], #16
+ ldp x10, x11, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x6, x7, [sp], #16
+ ldp x4, x5, [sp], #16
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+.endm
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+.macro do_el2_call
+ /*
+ * Shuffle the parameters before calling the function
+ * pointed to in x0. Assumes parameters in x[1,2,3].
+ */
+ str lr, [sp, #-16]!
+ mov lr, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ blr lr
+ ldr lr, [sp], #16
+.endm
+
+ENTRY(__vhe_hyp_call)
+ do_el2_call
+ /*
+ * We used to rely on having an exception return to get
+ * an implicit isb. In the E2H case, we don't have it anymore.
+ * rather than changing all the leaf functions, just do it here
+ * before returning to the rest of the kernel.
+ */
+ isb
+ ret
+ENDPROC(__vhe_hyp_call)
+
+el1_sync: // Guest trapped into EL2
+
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
+ cmp x0, #ESR_ELx_EC_HVC64
+ ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
+ b.ne el1_trap
+
+ mrs x1, vttbr_el2 // If vttbr is valid, the guest
+ cbnz x1, el1_hvc_guest // called HVC
+
+ /* Here, we're pretty sure the host called HVC. */
+ ldp x0, x1, [sp], #16
+
+ /* Check for a stub HVC call */
+ cmp x0, #HVC_STUB_HCALL_NR
+ b.hs 1f
+
+ /*
+ * Compute the idmap address of __kvm_handle_stub_hvc and
+ * jump there. Since we use kimage_voffset, do not use the
+ * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
+ * (by loading it from the constant pool).
+ *
+ * Preserve x0-x4, which may contain stub parameters.
+ */
+ ldr x5, =__kvm_handle_stub_hvc
+ ldr_l x6, kimage_voffset
+
+ /* x5 = __pa(x5) */
+ sub x5, x5, x6
+ br x5
+
+1:
+ /*
+ * Perform the EL2 call
+ */
+ kern_hyp_va x0
+ do_el2_call
+
+ eret
+
+el1_hvc_guest:
+ /*
+ * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1.
+ * The workaround has already been applied on the host,
+ * so let's quickly get back to the guest. We don't bother
+ * restoring x1, as it can be clobbered anyway.
+ */
+ ldr x1, [sp] // Guest's x0
+ eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+ cbz w1, wa_epilogue
+
+ /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_2)
+ cbz w1, wa_epilogue
+
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_3)
+ cbnz w1, el1_trap
+
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb arm64_enable_wa2_handling
+ b wa2_end
+alternative_cb_end
+ get_vcpu_ptr x2, x0
+ ldr x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ // Sanitize the argument and update the guest flags
+ ldr x1, [sp, #8] // Guest's x1
+ clz w1, w1 // Murphy's device:
+ lsr w1, w1, #5 // w1 = !!w1 without using
+ eor w1, w1, #1 // the flags...
+ bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+ str x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ /* Check that we actually need to perform the call */
+ hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+ cbz x0, wa2_end
+
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ smc #0
+
+ /* Don't leak data from the SMC call */
+ mov x3, xzr
+wa2_end:
+ mov x2, xzr
+ mov x1, xzr
+#endif
+
+wa_epilogue:
+ mov x0, xzr
+ add sp, sp, #16
+ eret
+
+el1_trap:
+ get_vcpu_ptr x1, x0
+ mov x0, #ARM_EXCEPTION_TRAP
+ b __guest_exit
+
+el1_irq:
+ get_vcpu_ptr x1, x0
+ mov x0, #ARM_EXCEPTION_IRQ
+ b __guest_exit
+
+el1_error:
+ get_vcpu_ptr x1, x0
+ mov x0, #ARM_EXCEPTION_EL1_SERROR
+ b __guest_exit
+
+el2_sync:
+ save_caller_saved_regs_vect
+ stp x29, x30, [sp, #-16]!
+ bl kvm_unexpected_el2_exception
+ ldp x29, x30, [sp], #16
+ restore_caller_saved_regs_vect
+
+ eret
+
+el2_error:
+ save_caller_saved_regs_vect
+ stp x29, x30, [sp, #-16]!
+
+ bl kvm_unexpected_el2_exception
+
+ ldp x29, x30, [sp], #16
+ restore_caller_saved_regs_vect
+
+ eret
+
+ENTRY(__hyp_do_panic)
+ mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+ PSR_MODE_EL1h)
+ msr spsr_el2, lr
+ ldr lr, =panic
+ msr elr_el2, lr
+ eret
+ENDPROC(__hyp_do_panic)
+
+ENTRY(__hyp_panic)
+ get_host_ctxt x0, x1
+ b hyp_panic
+ENDPROC(__hyp_panic)
+
+.macro invalid_vector label, target = __hyp_panic
+ .align 2
+\label:
+ b \target
+ENDPROC(\label)
+.endm
+
+ /* None of these should ever happen */
+ invalid_vector el2t_sync_invalid
+ invalid_vector el2t_irq_invalid
+ invalid_vector el2t_fiq_invalid
+ invalid_vector el2t_error_invalid
+ invalid_vector el2h_irq_invalid
+ invalid_vector el2h_fiq_invalid
+ invalid_vector el1_fiq_invalid
+
+ .ltorg
+
+ .align 11
+
+.macro valid_vect target
+ .align 7
+ stp x0, x1, [sp, #-16]!
+ b \target
+.endm
+
+.macro invalid_vect target
+ .align 7
+ b \target
+ ldp x0, x1, [sp], #16
+ b \target
+.endm
+
+ENTRY(__kvm_hyp_vector)
+ invalid_vect el2t_sync_invalid // Synchronous EL2t
+ invalid_vect el2t_irq_invalid // IRQ EL2t
+ invalid_vect el2t_fiq_invalid // FIQ EL2t
+ invalid_vect el2t_error_invalid // Error EL2t
+
+ valid_vect el2_sync // Synchronous EL2h
+ invalid_vect el2h_irq_invalid // IRQ EL2h
+ invalid_vect el2h_fiq_invalid // FIQ EL2h
+ valid_vect el2_error // Error EL2h
+
+ valid_vect el1_sync // Synchronous 64-bit EL1
+ valid_vect el1_irq // IRQ 64-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_error // Error 64-bit EL1
+
+ valid_vect el1_sync // Synchronous 32-bit EL1
+ valid_vect el1_irq // IRQ 32-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_error // Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+.macro hyp_ventry
+ .align 7
+1: .rept 27
+ nop
+ .endr
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp x0, x1, [sp, #-16]!
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb kvm_patch_vector_branch
+ b __kvm_hyp_vector + (1b - 0b)
+ nop
+ nop
+ nop
+ nop
+alternative_cb_end
+.endm
+
+.macro generate_vectors
+0:
+ .rept 16
+ hyp_ventry
+ .endr
+ .org 0b + SZ_2K // Safety measure
+.endm
+
+ .align 11
+ENTRY(__bp_harden_hyp_vecs_start)
+ .rept BP_HARDEN_EL2_SLOTS
+ generate_vectors
+ .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+
+ .popsection
+
+ENTRY(__smccc_workaround_1_smc_start)
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_1_smc_end)
+
+ENTRY(__smccc_workaround_3_smc_start)
+ esb
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_3_smc_end)
+
+ENTRY(__spectre_bhb_loop_k8_start)
+ esb
+ sub sp, sp, #(8 * 2)
+ stp x0, x1, [sp, #(8 * 0)]
+ mov x0, #8
+2: b . + 4
+ subs x0, x0, #1
+ b.ne 2b
+ dsb nsh
+ isb
+ ldp x0, x1, [sp, #(8 * 0)]
+ add sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k8_end)
+
+ENTRY(__spectre_bhb_loop_k24_start)
+ esb
+ sub sp, sp, #(8 * 2)
+ stp x0, x1, [sp, #(8 * 0)]
+ mov x0, #24
+2: b . + 4
+ subs x0, x0, #1
+ b.ne 2b
+ dsb nsh
+ isb
+ ldp x0, x1, [sp, #(8 * 0)]
+ add sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k24_end)
+
+ENTRY(__spectre_bhb_loop_k32_start)
+ esb
+ sub sp, sp, #(8 * 2)
+ stp x0, x1, [sp, #(8 * 0)]
+ mov x0, #32
+2: b . + 4
+ subs x0, x0, #1
+ b.ne 2b
+ dsb nsh
+ isb
+ ldp x0, x1, [sp, #(8 * 0)]
+ add sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k32_end)
+
+ENTRY(__spectre_bhb_clearbhb_start)
+ esb
+ clearbhb
+ isb
+ENTRY(__spectre_bhb_clearbhb_end)
+#endif
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000..603e1ee83
--- /dev/null
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+u32 __hyp_text __init_stage2_translation(void)
+{
+ u64 val = VTCR_EL2_FLAGS;
+ u64 parange;
+ u64 tmp;
+
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
+ * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
+ * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
+ */
+ parange = read_sysreg(id_aa64mmfr0_el1) & 7;
+ if (parange > ID_AA64MMFR0_PARANGE_MAX)
+ parange = ID_AA64MMFR0_PARANGE_MAX;
+ val |= parange << 16;
+
+ /* Compute the actual PARange... */
+ switch (parange) {
+ case 0:
+ parange = 32;
+ break;
+ case 1:
+ parange = 36;
+ break;
+ case 2:
+ parange = 40;
+ break;
+ case 3:
+ parange = 42;
+ break;
+ case 4:
+ parange = 44;
+ break;
+ case 5:
+ default:
+ parange = 48;
+ break;
+ }
+
+ /*
+ * ... and clamp it to 40 bits, unless we have some braindead
+ * HW that implements less than that. In all cases, we'll
+ * return that value for the rest of the kernel to decide what
+ * to do.
+ */
+ val |= 64 - (parange > 40 ? 40 : parange);
+
+ /*
+ * Check the availability of Hardware Access Flag / Dirty Bit
+ * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
+ if (tmp)
+ val |= VTCR_EL2_HA;
+
+ /*
+ * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
+ * bit in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
+ val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
+ VTCR_EL2_VS_16BIT :
+ VTCR_EL2_VS_8BIT;
+
+ write_sysreg(val, vtcr_el2);
+
+ return parange;
+}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
new file mode 100644
index 000000000..1c248c12a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -0,0 +1,702 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/cpufeature.h>
+#include <asm/extable.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/vectors.h>
+
+extern struct exception_table_entry __start___kvm_ex_table;
+extern struct exception_table_entry __stop___kvm_ex_table;
+
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When the system doesn't support FP/SIMD, we cannot rely on
+ * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+ * abort on the very first access to FP and thus we should never
+ * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+ * trap the accesses.
+ */
+ if (!system_supports_fpsimd() ||
+ vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
+
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+}
+
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+}
+
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
+ */
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+ write_sysreg(1 << 30, fpexc32_el2);
+ isb();
+ }
+}
+
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+ write_sysreg(1 << 15, hstr_el2);
+
+ /*
+ * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+ * PMSELR_EL0 to make sure it never contains the cycle
+ * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+ * EL1 instead of being trapped to EL2.
+ */
+ write_sysreg(0, pmselr_el0);
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static void __hyp_text __deactivate_traps_common(void)
+{
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
+
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_ZEN;
+ if (!update_fp_enabled(vcpu)) {
+ val &= ~CPACR_EL1_FPEN;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+NOKPROBE_SYMBOL(activate_traps_vhe);
+
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+ if (!update_fp_enabled(vcpu)) {
+ val |= CPTR_EL2_TFP;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cptr_el2);
+}
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+ if (has_vhe())
+ activate_traps_vhe(vcpu);
+ else
+ __activate_traps_nvhe(vcpu);
+}
+
+static void deactivate_traps_vhe(void)
+{
+ const char *host_vectors = vectors;
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
+
+ if (!arm64_kernel_unmapped_at_el0())
+ host_vectors = __this_cpu_read(this_cpu_vector);
+ write_sysreg(host_vectors, vbar_el1);
+}
+NOKPROBE_SYMBOL(deactivate_traps_vhe);
+
+static void __hyp_text __deactivate_traps_nvhe(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ __deactivate_traps_common();
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+ mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If we pended a virtual abort, preserve it until it gets
+ * cleared. See D1.14.3 (Virtual Interrupts) for details, but
+ * the crucial bit is "On taking a vSError interrupt,
+ * HCR_EL2.VSE is cleared to 0."
+ */
+ if (vcpu->arch.hcr_el2 & HCR_VSE)
+ vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
+
+ if (has_vhe())
+ deactivate_traps_vhe();
+ else
+ __deactivate_traps_nvhe();
+}
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
+}
+
+static void __hyp_text __activate_vm(struct kvm *kvm)
+{
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_save_state(vcpu);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
+}
+
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
+ __vgic_v3_restore_state(vcpu);
+ }
+}
+
+static bool __hyp_text __true_value(void)
+{
+ return true;
+}
+
+static bool __hyp_text __false_value(void)
+{
+ return false;
+}
+
+static hyp_alternate_select(__check_arm_834220,
+ __false_value, __true_value,
+ ARM64_WORKAROUND_834220);
+
+static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ if (!__kvm_at("s1e1r", far))
+ tmp = read_sysreg(par_el1);
+ else
+ tmp = 1; /* back to the guest */
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & 1))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
+ return true;
+}
+
+static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u8 ec;
+ u64 esr;
+ u64 hpfar, far;
+
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(far);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
+/* Skip an instruction which has been emulated. Returns true if
+ * execution can continue or false if we need to exit hyp mode because
+ * single-step was in effect.
+ */
+static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
+{
+ *vcpu_pc(vcpu) = read_sysreg_el2(elr);
+
+ if (vcpu_mode_is_32bit(vcpu)) {
+ vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
+ kvm_skip_instr32(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
+ } else {
+ *vcpu_pc(vcpu) += 4;
+ }
+
+ write_sysreg_el2(*vcpu_pc(vcpu), elr);
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ vcpu->arch.fault.esr_el2 =
+ (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
+ return false;
+ } else {
+ return true;
+ }
+}
+
+static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+{
+ struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+
+ if (has_vhe())
+ write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+ cpacr_el1);
+ else
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (system_supports_sve() &&
+ (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+ struct thread_struct *thread = container_of(
+ host_fpsimd,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+ } else {
+ __fpsimd_save_state(host_fpsimd);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+ fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+}
+
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
+ /*
+ * We're using the raw exception code in order to only process
+ * the trap if no SError is pending. We will come back to the
+ * same PC once the SError has been injected, and replay the
+ * trapping instruction.
+ */
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ */
+ if (system_supports_fpsimd() &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+ return __hyp_switch_fpsimd(vcpu);
+
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
+ kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_dabt_isextabt(vcpu) &&
+ !kvm_vcpu_abt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
+
+ if (ret == -1) {
+ /* Promote an illegal access to an
+ * SError. If we would be returning
+ * due to single-step clear the SS
+ * bit so handle_exit knows what to
+ * do after dealing with the error.
+ */
+ if (!__skip_instr(vcpu))
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+ }
+
+ goto exit;
+ }
+ }
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
+ }
+
+exit:
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_const_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ u64 exit_code;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(vcpu->kvm);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+NOKPROBE_SYMBOL(kvm_vcpu_run_vhe);
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ u64 exit_code;
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ __sysreg_save_state_nvhe(host_ctxt);
+ __debug_save_host_buffers_nvhe(vcpu);
+
+ __activate_traps(vcpu);
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ __sysreg_save_state_nvhe(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+ __timer_disable_traps(vcpu);
+ __hyp_vgic_save_state(vcpu);
+
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+
+ __sysreg_restore_state_nvhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ __debug_switch_to_host(vcpu);
+ /*
+ * This must come after restoring the host sysregs, since a non-VHE
+ * system may enable SPE here and make use of the TTBRs.
+ */
+ __debug_restore_host_buffers_nvhe(vcpu);
+
+ return exit_code;
+}
+
+static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *__host_ctxt)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long str_va;
+
+ vcpu = __host_ctxt->__hyp_running_vcpu;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(__host_ctxt);
+ }
+
+ /*
+ * Force the panic string to be loaded from the literal pool,
+ * making sure it is a kernel address and not a PC-relative
+ * reference.
+ */
+ asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
+
+ __hyp_do_panic(str_va,
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par, vcpu);
+}
+
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(esr), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par, vcpu);
+}
+NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
+
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(spsr);
+ u64 elr = read_sysreg_el2(elr);
+ u64 par = read_sysreg(par_el1);
+
+ if (!has_vhe())
+ __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+ else
+ __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
+
+ unreachable();
+}
+
+asmlinkage void __hyp_text kvm_unexpected_el2_exception(void)
+{
+ unsigned long addr, fixup;
+ struct kvm_cpu_context *host_ctxt;
+ struct exception_table_entry *entry, *end;
+ unsigned long elr_el2 = read_sysreg(elr_el2);
+
+ entry = hyp_symbol_addr(__start___kvm_ex_table);
+ end = hyp_symbol_addr(__stop___kvm_ex_table);
+ host_ctxt = __hyp_this_cpu_ptr(kvm_host_cpu_state);
+
+ while (entry < end) {
+ addr = (unsigned long)&entry->insn + entry->insn;
+ fixup = (unsigned long)&entry->fixup + entry->fixup;
+
+ if (addr != elr_el2) {
+ entry++;
+ continue;
+ }
+
+ write_sysreg(fixup, elr_el2);
+ return;
+ }
+
+ hyp_panic(host_ctxt);
+}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
new file mode 100644
index 000000000..7414b7619
--- /dev/null
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * Non-VHE: Both host and guest must save everything.
+ *
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
+ */
+
+static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
+ ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
+}
+
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
+ ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
+ ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
+ ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
+ ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
+ ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
+ ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
+ ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
+ ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
+ ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
+ ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
+ ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
+ ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
+ ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
+ ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
+ ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
+ ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
+ ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+ ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
+
+ ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
+ ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
+ ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
+}
+
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
+
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
+
+static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
+ write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
+}
+
+static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
+ write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
+ write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
+ write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
+ write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
+ write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
+ write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
+ write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
+ write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
+ write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
+ write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
+ write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
+ write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
+ write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
+ write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
+
+ write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
+ write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
+ write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static void __hyp_text
+__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
+}
+
+void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
+
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
+
+void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+ u64 *spsr, *sysreg;
+
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ spsr = vcpu->arch.ctxt.gp_regs.spsr;
+ sysreg = vcpu->arch.ctxt.sys_regs;
+
+ spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
+ spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
+ spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
+ spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
+
+ sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
+ sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
+}
+
+void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+ u64 *spsr, *sysreg;
+
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ spsr = vcpu->arch.ctxt.gp_regs.spsr;
+ sysreg = vcpu->arch.ctxt.sys_regs;
+
+ write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
+ write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
+ write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
+ write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
+
+ write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
+ write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
+}
+
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
+
+void __hyp_text __kvm_enable_ssbs(void)
+{
+ u64 tmp;
+
+ asm volatile(
+ "mrs %0, sctlr_el2\n"
+ "orr %0, %0, %1\n"
+ "msr sctlr_el2, %0"
+ : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
+}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
new file mode 100644
index 000000000..c041eab3d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irqflags.h>
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
+ unsigned long *flags)
+{
+ u64 val;
+
+ local_irq_save(*flags);
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ */
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
+ unsigned long *flags)
+{
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+ __tlb_switch_to_guest_nvhe,
+ __tlb_switch_to_guest_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
+ unsigned long flags)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ isb();
+ local_irq_restore(flags);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
+ unsigned long flags)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+ __tlb_switch_to_host_nvhe,
+ __tlb_switch_to_host_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+ unsigned long flags;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ kvm = kern_hyp_va(kvm);
+ __tlb_switch_to_guest()(kvm, &flags);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi(ipas2e1is, ipa);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /*
+ * If the host is running at EL1 and we have a VPIPT I-cache,
+ * then we must perform I-cache maintenance at EL2 in order for
+ * it to have an effect on the guest. Since the guest cannot hit
+ * I-cache lines allocated with a different VMID, we don't need
+ * to worry about junk out of guest reset (we nuke the I-cache on
+ * VMID rollover), but we do need to be careful when remapping
+ * executable pages for the same guest. This can happen when KSM
+ * takes a CoW fault on an executable page, copies the page into
+ * a page that was previously mapped in the guest and then needs
+ * to invalidate the guest view of the I-cache for that page
+ * from EL1. To solve this, we invalidate the entire I-cache when
+ * unmapping a page from a guest if we have a VPIPT I-cache but
+ * the host is running at EL1. As above, we could do better if
+ * we had the VA.
+ *
+ * The moral of this story is: if you have a VPIPT I-cache, then
+ * you should be running with VHE enabled.
+ */
+ if (!has_vhe() && icache_is_vpipt())
+ __flush_icache_all();
+
+ __tlb_switch_to_host()(kvm, flags);
+}
+
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+{
+ unsigned long flags;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ kvm = kern_hyp_va(kvm);
+ __tlb_switch_to_guest()(kvm, &flags);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host()(kvm, flags);
+}
+
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+ unsigned long flags;
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest()(kvm, &flags);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host()(kvm, flags);
+}
+
+void __hyp_text __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+ asm volatile("ic ialluis" : : );
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
new file mode 100644
index 000000000..215c7c0eb
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+#include <linux/swab.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT);
+
+ return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
+}
+
+/*
+ * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
+ * guest.
+ *
+ * @vcpu: the offending vcpu
+ *
+ * Returns:
+ * 1: GICV access successfully performed
+ * 0: Not a GICV access
+ * -1: Illegal GICV access
+ */
+int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ phys_addr_t fault_ipa;
+ void __iomem *addr;
+ int rd;
+
+ /* Build the full address */
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ /* If not for GICV, move on */
+ if (fault_ipa < vgic->vgic_cpu_base ||
+ fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
+ return 0;
+
+ /* Reject anything but a 32bit access */
+ if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
+ return -1;
+
+ /* Not aligned? Don't bother */
+ if (fault_ipa & 3)
+ return -1;
+
+ rd = kvm_vcpu_dabt_get_rd(vcpu);
+ addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr += fault_ipa - vgic->vgic_cpu_base;
+
+ if (kvm_vcpu_dabt_iswrite(vcpu)) {
+ u32 data = vcpu_get_reg(vcpu, rd);
+ if (__is_be(vcpu)) {
+ /* guest pre-swabbed data, undo this for writel() */
+ data = swab32(data);
+ }
+ writel_relaxed(data, addr);
+ } else {
+ u32 data = readl_relaxed(addr);
+ if (__is_be(vcpu)) {
+ /* guest expects swabbed data */
+ data = swab32(data);
+ }
+ vcpu_set_reg(vcpu, rd, data);
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000..41c80c311
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,248 @@
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define CURRENT_EL_SP_EL0_VECTOR 0x0
+#define CURRENT_EL_SP_ELx_VECTOR 0x200
+#define LOWER_EL_AArch64_VECTOR 0x400
+#define LOWER_EL_AArch32_VECTOR 0x600
+
+enum exception_type {
+ except_type_sync = 0,
+ except_type_irq = 0x80,
+ except_type_fiq = 0x100,
+ except_type_serror = 0x180,
+};
+
+static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+{
+ u64 exc_offset;
+
+ switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
+ case PSR_MODE_EL1t:
+ exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+ break;
+ case PSR_MODE_EL1h:
+ exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+ break;
+ case PSR_MODE_EL0t:
+ exc_offset = LOWER_EL_AArch64_VECTOR;
+ break;
+ default:
+ exc_offset = LOWER_EL_AArch32_VECTOR;
+ }
+
+ return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+}
+
+/*
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_N_BIT);
+ new |= (old & PSR_Z_BIT);
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+ new |= (old & PSR_DIT_BIT);
+
+ // PSTATE.UAO is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D5-2579.
+
+ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page D5-2578.
+ new |= (old & PSR_PAN_BIT);
+ if (!(sctlr & SCTLR_EL1_SPAN))
+ new |= PSR_PAN_BIT;
+
+ // PSTATE.SS is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D2-2452.
+
+ // PSTATE.IL is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D1-2306.
+
+ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D13-3258
+ if (sctlr & SCTLR_ELx_DSSBS)
+ new |= PSR_SSBS_BIT;
+
+ // PSTATE.BTYPE is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+ new |= PSR_D_BIT;
+ new |= PSR_A_BIT;
+ new |= PSR_I_BIT;
+ new |= PSR_F_BIT;
+
+ new |= PSR_MODE_EL1h;
+
+ return new;
+}
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
+ u32 esr = 0;
+
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
+ *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
+ vcpu_write_spsr(vcpu, cpsr);
+
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
+
+ /*
+ * Build an {i,d}abort, depending on the level and the
+ * instruction set. Report an external synchronous abort.
+ */
+ if (kvm_vcpu_trap_il_is32bit(vcpu))
+ esr |= ESR_ELx_IL;
+
+ /*
+ * Here, the guest runs in AArch64 mode when in EL1. If we get
+ * an AArch32 fault, it means we managed to trap an EL0 fault.
+ */
+ if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+ esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT);
+ else
+ esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
+
+ if (!is_iabt)
+ esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
+
+ vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
+ *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
+ vcpu_write_spsr(vcpu, cpsr);
+
+ /*
+ * Build an unknown exception, depending on the instruction
+ * set.
+ */
+ if (kvm_vcpu_trap_il_is32bit(vcpu))
+ esr |= ESR_ELx_IL;
+
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ if (vcpu_el1_is_32bit(vcpu))
+ kvm_inject_dabt32(vcpu, addr);
+ else
+ inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ if (vcpu_el1_is_32bit(vcpu))
+ kvm_inject_pabt32(vcpu, addr);
+ else
+ inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_el1_is_32bit(vcpu))
+ kvm_inject_undef32(vcpu);
+ else
+ inject_undef64(vcpu);
+}
+
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
+{
+ vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
+}
+
+/**
+ * kvm_inject_vabt - inject an async abort / SError into the guest
+ * @vcpu: The VCPU to receive the exception
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ *
+ * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
+ * the remaining ISS all-zeros so that this error is not interpreted as an
+ * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
+ * value, so the CPU generates an imp-def value.
+ */
+void kvm_inject_vabt(struct kvm_vcpu *vcpu)
+{
+ kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
+}
diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h
new file mode 100644
index 000000000..b74099b90
--- /dev/null
+++ b/arch/arm64/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000..4c2e96ef3
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+ (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+ /* USR Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
+ REG_OFFSET(pc)
+ },
+
+ /* FIQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+ REG_OFFSET(compat_r8_fiq), /* r8 */
+ REG_OFFSET(compat_r9_fiq), /* r9 */
+ REG_OFFSET(compat_r10_fiq), /* r10 */
+ REG_OFFSET(compat_r11_fiq), /* r11 */
+ REG_OFFSET(compat_r12_fiq), /* r12 */
+ REG_OFFSET(compat_sp_fiq), /* r13 */
+ REG_OFFSET(compat_lr_fiq), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* IRQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_irq), /* r13 */
+ REG_OFFSET(compat_lr_irq), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* SVC Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_svc), /* r13 */
+ REG_OFFSET(compat_lr_svc), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* ABT Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_abt), /* r13 */
+ REG_OFFSET(compat_lr_abt), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* UND Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_und), /* r13 */
+ REG_OFFSET(compat_lr_und), /* r14 */
+ REG_OFFSET(pc)
+ },
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+ unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
+
+ switch (mode) {
+ case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC:
+ mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+ break;
+
+ case PSR_AA32_MODE_ABT:
+ mode = 4;
+ break;
+
+ case PSR_AA32_MODE_UND:
+ mode = 5;
+ break;
+
+ case PSR_AA32_MODE_SYS:
+ mode = 0; /* SYS maps to USR */
+ break;
+
+ default:
+ BUG();
+ }
+
+ return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
+{
+ unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
+ switch (mode) {
+ case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC;
+ case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT;
+ case PSR_AA32_MODE_UND: return KVM_SPSR_UND;
+ case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ;
+ case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ;
+ default: BUG();
+ }
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return read_sysreg_el1(spsr);
+ case KVM_SPSR_ABT:
+ return read_sysreg(spsr_abt);
+ case KVM_SPSR_UND:
+ return read_sysreg(spsr_und);
+ case KVM_SPSR_IRQ:
+ return read_sysreg(spsr_irq);
+ case KVM_SPSR_FIQ:
+ return read_sysreg(spsr_fiq);
+ default:
+ BUG();
+ }
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ return;
+ }
+
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ write_sysreg_el1(v, spsr);
+ break;
+ case KVM_SPSR_ABT:
+ write_sysreg(v, spsr_abt);
+ break;
+ case KVM_SPSR_UND:
+ write_sysreg(v, spsr_und);
+ break;
+ case KVM_SPSR_IRQ:
+ write_sysreg(v, spsr_irq);
+ break;
+ case KVM_SPSR_FIQ:
+ write_sysreg(v, spsr_fiq);
+ break;
+ }
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000..0688816f1
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/hw_breakpoint.h>
+
+#include <kvm/arm_arch_timer.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+ .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+ PSR_F_BIT | PSR_D_BIT),
+};
+
+static const struct kvm_regs default_regs_reset32 = {
+ .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT |
+ PSR_AA32_I_BIT | PSR_AA32_F_BIT),
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+ u64 pfr0;
+
+ pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ return !!(pfr0 & 0x20);
+}
+
+/**
+ * kvm_arch_dev_ioctl_check_extension
+ *
+ * We currently assume that the number of HW registers is uniform
+ * across all CPUs (see cpuinfo_sanity_check).
+ */
+int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_ARM_EL1_32BIT:
+ r = cpu_has_32bit_el1();
+ break;
+ case KVM_CAP_GUEST_DEBUG_HW_BPS:
+ r = get_num_brps();
+ break;
+ case KVM_CAP_GUEST_DEBUG_HW_WPS:
+ r = get_num_wrps();
+ break;
+ case KVM_CAP_ARM_PMU_V3:
+ r = kvm_arm_support_pmu_v3();
+ break;
+ case KVM_CAP_ARM_INJECT_SERROR_ESR:
+ r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+ break;
+ case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_VCPU_ATTRIBUTES:
+ case KVM_CAP_VCPU_EVENTS:
+ r = 1;
+ break;
+ default:
+ r = 0;
+ }
+
+ return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architecturally defined reset
+ * values.
+ *
+ * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
+ * ioctl or as part of handling a request issued by another VCPU in the PSCI
+ * handling code. In the first case, the VCPU will not be loaded, and in the
+ * second case the VCPU will be loaded. Because this function operates purely
+ * on the memory-backed valus of system registers, we want to do a full put if
+ * we were loaded (handling a request) and load the values back at the end of
+ * the function. Otherwise we leave the state alone. In both cases, we
+ * disable preemption around the vcpu reset as we would otherwise race with
+ * preempt notifiers which also call put/load.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_regs *cpu_reset;
+ int ret = -EINVAL;
+ bool loaded;
+
+ /* Reset PMU outside of the non-preemptible section */
+ kvm_pmu_vcpu_reset(vcpu);
+
+ preempt_disable();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
+
+ switch (vcpu->arch.target) {
+ default:
+ if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+ if (!cpu_has_32bit_el1())
+ goto out;
+ cpu_reset = &default_regs_reset32;
+ } else {
+ cpu_reset = &default_regs_reset;
+ }
+
+ break;
+ }
+
+ /* Reset core registers */
+ memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+ /* Reset system registers */
+ kvm_reset_sys_regs(vcpu);
+
+ /*
+ * Additional reset state handling that PSCI may have imposed on us.
+ * Must be done after all the sys_reg reset.
+ */
+ if (vcpu->arch.reset_state.reset) {
+ unsigned long target_pc = vcpu->arch.reset_state.pc;
+
+ /* Gracefully handle Thumb2 entry point */
+ if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
+ target_pc &= ~1UL;
+ vcpu_set_thumb(vcpu);
+ }
+
+ /* Propagate caller endianness */
+ if (vcpu->arch.reset_state.be)
+ kvm_vcpu_set_be(vcpu);
+
+ *vcpu_pc(vcpu) = target_pc;
+ vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
+
+ vcpu->arch.reset_state.reset = false;
+ }
+
+ /* Default workaround setup is enabled (if supported) */
+ if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
+ vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+
+ /* Reset timer */
+ ret = kvm_timer_vcpu_reset(vcpu);
+out:
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
+ return ret;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000..f06629bf2
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,2606 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bsearch.h>
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/perf_event.h>
+#include <asm/sysreg.h>
+
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+#include "trace.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
+ */
+
+static bool read_from_write_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
+static bool write_to_read_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_read;
+
+ /*
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ switch (reg) {
+ case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
+ case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+ case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
+ case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
+ case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
+ case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
+ case TCR_EL1: return read_sysreg_s(tcr_EL12);
+ case ESR_EL1: return read_sysreg_s(esr_EL12);
+ case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
+ case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
+ case FAR_EL1: return read_sysreg_s(far_EL12);
+ case MAIR_EL1: return read_sysreg_s(mair_EL12);
+ case VBAR_EL1: return read_sysreg_s(vbar_EL12);
+ case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
+ case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
+ case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
+ case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
+ case AMAIR_EL1: return read_sysreg_s(amair_EL12);
+ case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
+ case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
+ case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
+ case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
+ case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
+ }
+
+immediate_read:
+ return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_write;
+
+ /*
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the the MPIDR should only be
+ * set once, before running the VCPU, and never changed later.
+ */
+ switch (reg) {
+ case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
+ case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
+ case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
+ case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
+ case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
+ case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
+ case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
+ case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
+ case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
+ case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
+ case FAR_EL1: write_sysreg_s(val, far_EL12); return;
+ case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
+ case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
+ case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
+ case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
+ }
+
+immediate_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
+}
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+ u32 ccsidr;
+
+ /* Make sure noone else changes CSSELR during this! */
+ local_irq_disable();
+ write_sysreg(csselr, csselr_el1);
+ isb();
+ ccsidr = read_sysreg(ccsidr_el1);
+ local_irq_enable();
+
+ return ccsidr;
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p, r);
+
+ /*
+ * Only track S/W ops if we don't have FWB. It still indicates
+ * that the guest is a bit broken (S/W operations should only
+ * be done by firmware, knowing that there is only a single
+ * CPU left in the system, and certainly not from non-secure
+ * software).
+ */
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ kvm_set_way_flush(vcpu);
+
+ return true;
+}
+
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set. If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ bool was_enabled = vcpu_has_cache_enabled(vcpu);
+ u64 val;
+ int reg = r->reg;
+
+ BUG_ON(!p->is_write);
+
+ /* See the 32bit mapping in kvm_host.h */
+ if (p->is_aarch32)
+ reg = r->reg / 2;
+
+ if (!p->is_aarch32 || !p->is_32bit) {
+ val = p->regval;
+ } else {
+ val = vcpu_read_sys_reg(vcpu, reg);
+ if (r->reg % 2)
+ val = (p->regval << 32) | (u64)lower_32_bits(val);
+ else
+ val = ((u64)upper_32_bits(val) << 32) |
+ lower_32_bits(p->regval);
+ }
+ vcpu_write_sys_reg(vcpu, val, reg);
+
+ kvm_toggle_cache(vcpu, was_enabled);
+ return true;
+}
+
+/*
+ * Trap handler for the GICv3 SGI generation system register.
+ * Forward the request to the VGIC emulation.
+ * The cp15_64 code makes sure this automatically works
+ * for both AArch64 and AArch32 accesses.
+ */
+static bool access_gic_sgi(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ bool g1;
+
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p, r);
+
+ /*
+ * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
+ * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
+ * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
+ * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
+ * group.
+ */
+ if (p->is_aarch32) {
+ switch (p->Op1) {
+ default: /* Keep GCC quiet */
+ case 0: /* ICC_SGI1R */
+ g1 = true;
+ break;
+ case 1: /* ICC_ASGI1R */
+ case 2: /* ICC_SGI0R */
+ g1 = false;
+ break;
+ }
+ } else {
+ switch (p->Op2) {
+ default: /* Keep GCC quiet */
+ case 5: /* ICC_SGI1R_EL1 */
+ g1 = true;
+ break;
+ case 6: /* ICC_ASGI1R_EL1 */
+ case 7: /* ICC_SGI0R_EL1 */
+ g1 = false;
+ break;
+ }
+ }
+
+ vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
+
+ return true;
+}
+
+static bool access_gic_sre(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
+ return true;
+}
+
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+ else
+ return read_zero(vcpu, p);
+}
+
+static bool trap_undef(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ p->regval = (1 << 3);
+ return true;
+ }
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ p->regval = read_sysreg(dbgauthstatus_el1);
+ return true;
+ }
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ *
+ * - If we've touched any debug register, it is likely that we're
+ * going to touch more of them. It then makes sense to disable the
+ * traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ * then mandatory to save/restore the registers, as the guest
+ * depends on them.
+ *
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ * disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ * set the dirty bit, disable the traps, save host registers,
+ * restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ * registers and clear the dirty bit. This ensure that the host can
+ * now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ p->regval = vcpu_read_sys_reg(vcpu, r->reg);
+ }
+
+ trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
+
+ return true;
+}
+
+/*
+ * reg_to_dbg/dbg_to_reg
+ *
+ * A 32 bit write to a debug register leave top bits alone
+ * A 32 bit read from a debug register only returns the bottom bits
+ *
+ * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
+ * hyp.S code switches between host and guest values in future.
+ */
+static void reg_to_dbg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ u64 *dbg_reg)
+{
+ u64 val = p->regval;
+
+ if (p->is_32bit) {
+ val &= 0xffffffffUL;
+ val |= ((*dbg_reg >> 32) << 32);
+ }
+
+ *dbg_reg = val;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+}
+
+static void dbg_to_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ u64 *dbg_reg)
+{
+ p->regval = *dbg_reg;
+ if (p->is_32bit)
+ p->regval &= 0xffffffffUL;
+}
+
+static bool trap_bvr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static void reset_bvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
+}
+
+static bool trap_bcr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static void reset_bcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
+}
+
+static bool trap_wvr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->CRm, p->is_write,
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
+
+ return true;
+}
+
+static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static void reset_wvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
+}
+
+static bool trap_wcr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static void reset_wcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 amair = read_sysreg(amair_el1);
+ vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 mpidr;
+
+ /*
+ * Map the vcpu_id into the first three affinity level fields of
+ * the MPIDR. We limit the number of VCPUs in level 0 due to a
+ * limitation to 16 CPUs in that level in the ICC_SGIxR registers
+ * of the GICv3 to be able to address each CPU directly when
+ * sending IPIs.
+ */
+ mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
+ mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
+ mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
+ vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
+}
+
+static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 pmcr, val;
+
+ /* No PMU available, PMCR_EL0 may UNDEF... */
+ if (!kvm_arm_support_pmu_v3())
+ return;
+
+ pmcr = read_sysreg(pmcr_el0);
+ /*
+ * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN
+ * except PMCR.E resetting to zero.
+ */
+ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
+ | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ __vcpu_sys_reg(vcpu, r->reg) = val;
+}
+
+static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
+{
+ u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+ bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
+
+ if (!enabled)
+ kvm_inject_undefined(vcpu);
+
+ return !enabled;
+}
+
+static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_EN);
+}
+
+static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN);
+}
+
+static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN);
+}
+
+static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN);
+}
+
+static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ /* Only update writeable bits of PMCR */
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ val &= ~ARMV8_PMU_PMCR_MASK;
+ val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ kvm_pmu_handle_pmcr(vcpu, val);
+ } else {
+ /* PMCR.P & PMCR.C are RAZ */
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0)
+ & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
+ p->regval = val;
+ }
+
+ return true;
+}
+
+static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write)
+ __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ else
+ /* return PMSELR.SEL field */
+ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+
+ return true;
+}
+
+static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 pmceid;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ BUG_ON(p->is_write);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (!(p->Op2 & 1))
+ pmceid = read_sysreg(pmceid0_el0);
+ else
+ pmceid = read_sysreg(pmceid1_el0);
+
+ p->regval = pmceid;
+
+ return true;
+}
+
+static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
+{
+ u64 pmcr, val;
+
+ pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ return true;
+}
+
+static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (r->CRn == 9 && r->CRm == 13) {
+ if (r->Op2 == 2) {
+ /* PMXEVCNTR_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+ } else if (r->Op2 == 0) {
+ /* PMCCNTR_EL0 */
+ if (pmu_access_cycle_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ARMV8_PMU_CYCLE_IDX;
+ } else {
+ return false;
+ }
+ } else if (r->CRn == 0 && r->CRm == 9) {
+ /* PMCCNTR */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ARMV8_PMU_CYCLE_IDX;
+ } else if (r->CRn == 14 && (r->CRm & 12) == 8) {
+ /* PMEVCNTRn_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ } else {
+ return false;
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ kvm_pmu_set_counter_value(vcpu, idx, p->regval);
+ } else {
+ p->regval = kvm_pmu_get_counter_value(vcpu, idx);
+ }
+
+ return true;
+}
+
+static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx, reg;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
+ /* PMXEVTYPER_EL0 */
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ reg = PMEVTYPER0_EL0 + idx;
+ } else if (r->CRn == 14 && (r->CRm & 12) == 12) {
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ reg = PMCCFILTR_EL0;
+ else
+ /* PMEVTYPERn_EL0 */
+ reg = PMEVTYPER0_EL0 + idx;
+ } else {
+ BUG();
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
+ __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ } else {
+ p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ }
+
+ return true;
+}
+
+static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val, mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ if (p->is_write) {
+ val = p->regval & mask;
+ if (r->Op2 & 0x1) {
+ /* accessing PMCNTENSET_EL0 */
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ kvm_pmu_enable_counter(vcpu, val);
+ } else {
+ /* accessing PMCNTENCLR_EL0 */
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ kvm_pmu_disable_counter(vcpu, val);
+ }
+ } else {
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (!vcpu_mode_priv(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ if (p->is_write) {
+ u64 val = p->regval & mask;
+
+ if (r->Op2 & 0x1)
+ /* accessing PMINTENSET_EL1 */
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ else
+ /* accessing PMINTENCLR_EL1 */
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ } else {
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ if (r->CRm & 0x2)
+ /* accessing PMOVSSET_EL0 */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ else
+ /* accessing PMOVSCLR_EL0 */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ } else {
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p, r);
+
+ if (pmu_write_swinc_el0_disabled(vcpu))
+ return false;
+
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ kvm_pmu_software_increment(vcpu, p->regval & mask);
+ return true;
+}
+
+static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (p->is_write) {
+ if (!vcpu_mode_priv(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+ p->regval & ARMV8_PMU_USERENR_MASK;
+ } else {
+ p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ & ARMV8_PMU_USERENR_MASK;
+ }
+
+ return true;
+}
+
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
+ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \
+ trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \
+ { SYS_DESC(SYS_DBGBCRn_EL1(n)), \
+ trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \
+ { SYS_DESC(SYS_DBGWVRn_EL1(n)), \
+ trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \
+ { SYS_DESC(SYS_DBGWCRn_EL1(n)), \
+ trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
+
+/* Macro to expand the PMEVCNTRn_EL0 register */
+#define PMU_PMEVCNTR_EL0(n) \
+ { SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \
+ access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
+
+/* Macro to expand the PMEVTYPERn_EL0 register */
+#define PMU_PMEVTYPER_EL0(n) \
+ { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \
+ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
+
+static bool access_cntp_tval(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 now = kvm_phys_timer_read();
+ u64 cval;
+
+ if (p->is_write) {
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL,
+ p->regval + now);
+ } else {
+ cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+ p->regval = cval - now;
+ }
+
+ return true;
+}
+
+static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval);
+ else
+ p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+
+ return true;
+}
+
+static bool access_cntp_cval(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval);
+ else
+ p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+
+ return true;
+}
+
+/* Read a sanitised cpufeature ID register by sys_reg_desc */
+static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
+{
+ u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+ (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+ u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+
+ if (id == SYS_ID_AA64PFR0_EL1) {
+ if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
+ kvm_debug("SVE unsupported for guests, suppressing\n");
+
+ val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ } else if (id == SYS_ID_AA64MMFR1_EL1) {
+ if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
+ kvm_debug("LORegions unsupported for guests, suppressing\n");
+
+ val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
+ }
+
+ return val;
+}
+
+/* cpufeature ID register access trap handlers */
+
+static bool __access_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r,
+ bool raz)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = read_id_reg(r, raz);
+ return true;
+}
+
+static bool access_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return __access_id_reg(vcpu, p, r, false);
+}
+
+static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return __access_id_reg(vcpu, p, r, true);
+}
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+
+/*
+ * cpufeature ID register user accessors
+ *
+ * For now, these registers are immutable for userspace, so no values
+ * are stored, and for set_id_reg() we don't allow the effective value
+ * to be changed.
+ */
+static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ bool raz)
+{
+ const u64 id = sys_reg_to_index(rd);
+ const u64 val = read_id_reg(rd, raz);
+
+ return reg_to_user(uaddr, &val, id);
+}
+
+static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ bool raz)
+{
+ const u64 id = sys_reg_to_index(rd);
+ int err;
+ u64 val;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /* This is what we mean by invariant: you can't change it. */
+ if (val != read_id_reg(rd, raz))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __get_id_reg(rd, uaddr, false);
+}
+
+static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __set_id_reg(rd, uaddr, false);
+}
+
+static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __get_id_reg(rd, uaddr, true);
+}
+
+static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __set_id_reg(rd, uaddr, true);
+}
+
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define ID_SANITISED(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+}
+
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = access_raz_id_reg, \
+ .get_user = get_raz_id_reg, \
+ .set_user = set_raz_id_reg, \
+}
+
+/*
+ * sys_reg_desc initialiser for known ID registers that we hide from guests.
+ * For now, these are exposed just like unallocated ID regs: they appear
+ * RAZ for the guest.
+ */
+#define ID_HIDDEN(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_raz_id_reg, \
+ .get_user = get_raz_id_reg, \
+ .set_user = set_raz_id_reg, \
+}
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+ { SYS_DESC(SYS_DC_ISW), access_dcsw },
+ { SYS_DESC(SYS_DC_CSW), access_dcsw },
+ { SYS_DESC(SYS_DC_CISW), access_dcsw },
+
+ DBG_BCR_BVR_WCR_WVR_EL1(0),
+ DBG_BCR_BVR_WCR_WVR_EL1(1),
+ { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+ { SYS_DESC(SYS_MDSCR_EL1), trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+ DBG_BCR_BVR_WCR_WVR_EL1(2),
+ DBG_BCR_BVR_WCR_WVR_EL1(3),
+ DBG_BCR_BVR_WCR_WVR_EL1(4),
+ DBG_BCR_BVR_WCR_WVR_EL1(5),
+ DBG_BCR_BVR_WCR_WVR_EL1(6),
+ DBG_BCR_BVR_WCR_WVR_EL1(7),
+ DBG_BCR_BVR_WCR_WVR_EL1(8),
+ DBG_BCR_BVR_WCR_WVR_EL1(9),
+ DBG_BCR_BVR_WCR_WVR_EL1(10),
+ DBG_BCR_BVR_WCR_WVR_EL1(11),
+ DBG_BCR_BVR_WCR_WVR_EL1(12),
+ DBG_BCR_BVR_WCR_WVR_EL1(13),
+ DBG_BCR_BVR_WCR_WVR_EL1(14),
+ DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+ { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 },
+ { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 },
+
+ { SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi },
+ { SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi },
+ // DBGDTR[TR]X_EL0 share the same encoding
+ { SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
+
+ { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
+
+ { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+
+ /*
+ * ID regs: all ID_SANITISED() entries here must have corresponding
+ * entries in arm64_ftr_regs[].
+ */
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+ ID_SANITISED(ID_PFR0_EL1),
+ ID_SANITISED(ID_PFR1_EL1),
+ ID_SANITISED(ID_DFR0_EL1),
+ ID_HIDDEN(ID_AFR0_EL1),
+ ID_SANITISED(ID_MMFR0_EL1),
+ ID_SANITISED(ID_MMFR1_EL1),
+ ID_SANITISED(ID_MMFR2_EL1),
+ ID_SANITISED(ID_MMFR3_EL1),
+
+ /* CRm=2 */
+ ID_SANITISED(ID_ISAR0_EL1),
+ ID_SANITISED(ID_ISAR1_EL1),
+ ID_SANITISED(ID_ISAR2_EL1),
+ ID_SANITISED(ID_ISAR3_EL1),
+ ID_SANITISED(ID_ISAR4_EL1),
+ ID_SANITISED(ID_ISAR5_EL1),
+ ID_SANITISED(ID_MMFR4_EL1),
+ ID_UNALLOCATED(2,7),
+
+ /* CRm=3 */
+ ID_SANITISED(MVFR0_EL1),
+ ID_SANITISED(MVFR1_EL1),
+ ID_SANITISED(MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
+ ID_UNALLOCATED(3,4),
+ ID_UNALLOCATED(3,5),
+ ID_UNALLOCATED(3,6),
+ ID_UNALLOCATED(3,7),
+
+ /* AArch64 ID registers */
+ /* CRm=4 */
+ ID_SANITISED(ID_AA64PFR0_EL1),
+ ID_SANITISED(ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
+ ID_UNALLOCATED(4,4),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
+
+ /* CRm=5 */
+ ID_SANITISED(ID_AA64DFR0_EL1),
+ ID_SANITISED(ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
+ ID_HIDDEN(ID_AA64AFR0_EL1),
+ ID_HIDDEN(ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
+
+ /* CRm=6 */
+ ID_SANITISED(ID_AA64ISAR0_EL1),
+ ID_SANITISED(ID_AA64ISAR1_EL1),
+ ID_SANITISED(ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
+
+ /* CRm=7 */
+ ID_SANITISED(ID_AA64MMFR0_EL1),
+ ID_SANITISED(ID_AA64MMFR1_EL1),
+ ID_SANITISED(ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
+
+ { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
+ { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+ { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
+ { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
+ { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
+
+ { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
+ { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
+ { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
+
+ { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+
+ { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
+ { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
+
+ { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
+ { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 },
+
+ { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
+ { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+
+ { SYS_DESC(SYS_LORSA_EL1), trap_undef },
+ { SYS_DESC(SYS_LOREA_EL1), trap_undef },
+ { SYS_DESC(SYS_LORN_EL1), trap_undef },
+ { SYS_DESC(SYS_LORC_EL1), trap_undef },
+ { SYS_DESC(SYS_LORID_EL1), trap_undef },
+
+ { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+ { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
+
+ { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+
+ { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
+ { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
+
+ { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
+
+ { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 },
+
+ { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
+ { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
+ { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 },
+ { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 },
+ { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
+ { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
+ { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
+ { SYS_DESC(SYS_PMCEID1_EL0), access_pmceid },
+ { SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
+ { SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper },
+ { SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr },
+ /*
+ * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
+ { SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
+ { SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
+
+ { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
+ { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
+
+ { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval },
+ { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl },
+ { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval },
+
+ /* PMEVCNTRn_EL0 */
+ PMU_PMEVCNTR_EL0(0),
+ PMU_PMEVCNTR_EL0(1),
+ PMU_PMEVCNTR_EL0(2),
+ PMU_PMEVCNTR_EL0(3),
+ PMU_PMEVCNTR_EL0(4),
+ PMU_PMEVCNTR_EL0(5),
+ PMU_PMEVCNTR_EL0(6),
+ PMU_PMEVCNTR_EL0(7),
+ PMU_PMEVCNTR_EL0(8),
+ PMU_PMEVCNTR_EL0(9),
+ PMU_PMEVCNTR_EL0(10),
+ PMU_PMEVCNTR_EL0(11),
+ PMU_PMEVCNTR_EL0(12),
+ PMU_PMEVCNTR_EL0(13),
+ PMU_PMEVCNTR_EL0(14),
+ PMU_PMEVCNTR_EL0(15),
+ PMU_PMEVCNTR_EL0(16),
+ PMU_PMEVCNTR_EL0(17),
+ PMU_PMEVCNTR_EL0(18),
+ PMU_PMEVCNTR_EL0(19),
+ PMU_PMEVCNTR_EL0(20),
+ PMU_PMEVCNTR_EL0(21),
+ PMU_PMEVCNTR_EL0(22),
+ PMU_PMEVCNTR_EL0(23),
+ PMU_PMEVCNTR_EL0(24),
+ PMU_PMEVCNTR_EL0(25),
+ PMU_PMEVCNTR_EL0(26),
+ PMU_PMEVCNTR_EL0(27),
+ PMU_PMEVCNTR_EL0(28),
+ PMU_PMEVCNTR_EL0(29),
+ PMU_PMEVCNTR_EL0(30),
+ /* PMEVTYPERn_EL0 */
+ PMU_PMEVTYPER_EL0(0),
+ PMU_PMEVTYPER_EL0(1),
+ PMU_PMEVTYPER_EL0(2),
+ PMU_PMEVTYPER_EL0(3),
+ PMU_PMEVTYPER_EL0(4),
+ PMU_PMEVTYPER_EL0(5),
+ PMU_PMEVTYPER_EL0(6),
+ PMU_PMEVTYPER_EL0(7),
+ PMU_PMEVTYPER_EL0(8),
+ PMU_PMEVTYPER_EL0(9),
+ PMU_PMEVTYPER_EL0(10),
+ PMU_PMEVTYPER_EL0(11),
+ PMU_PMEVTYPER_EL0(12),
+ PMU_PMEVTYPER_EL0(13),
+ PMU_PMEVTYPER_EL0(14),
+ PMU_PMEVTYPER_EL0(15),
+ PMU_PMEVTYPER_EL0(16),
+ PMU_PMEVTYPER_EL0(17),
+ PMU_PMEVTYPER_EL0(18),
+ PMU_PMEVTYPER_EL0(19),
+ PMU_PMEVTYPER_EL0(20),
+ PMU_PMEVTYPER_EL0(21),
+ PMU_PMEVTYPER_EL0(22),
+ PMU_PMEVTYPER_EL0(23),
+ PMU_PMEVTYPER_EL0(24),
+ PMU_PMEVTYPER_EL0(25),
+ PMU_PMEVTYPER_EL0(26),
+ PMU_PMEVTYPER_EL0(27),
+ PMU_PMEVTYPER_EL0(28),
+ PMU_PMEVTYPER_EL0(29),
+ PMU_PMEVTYPER_EL0(30),
+ /*
+ * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
+ { SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
+
+ { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+ { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+ { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
+};
+
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+
+ p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+ (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+ (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+ | (6 << 16) | (el3 << 14) | (el3 << 12));
+ return true;
+ }
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_cp14(vcpu, r->reg) = p->regval;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ p->regval = vcpu_cp14(vcpu, r->reg);
+ }
+
+ return true;
+}
+
+/* AArch32 debug register mappings
+ *
+ * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
+ * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
+ *
+ * All control registers and watchpoint value registers are mapped to
+ * the lower 32 bits of their AArch64 equivalents. We share the trap
+ * handlers with the above AArch64 code which checks what mode the
+ * system is in.
+ */
+
+static bool trap_xvr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+
+ if (p->is_write) {
+ u64 val = *dbg_reg;
+
+ val &= 0xffffffffUL;
+ val |= p->regval << 32;
+ *dbg_reg = val;
+
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ p->regval = *dbg_reg >> 32;
+ }
+
+ trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
+
+#define DBGBXVR(n) \
+ { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, would this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+ /* DBGIDR */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+ /* DBGDTRRXext */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+ DBG_BCR_BVR_WCR_WVR(0),
+ /* DBGDSCRint */
+ { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(1),
+ /* DBGDCCINT */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT },
+ /* DBGDSCRext */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext },
+ DBG_BCR_BVR_WCR_WVR(2),
+ /* DBGDTR[RT]Xint */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+ /* DBGDTR[RT]Xext */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(3),
+ DBG_BCR_BVR_WCR_WVR(4),
+ DBG_BCR_BVR_WCR_WVR(5),
+ /* DBGWFAR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+ /* DBGOSECCR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(6),
+ /* DBGVCR */
+ { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR },
+ DBG_BCR_BVR_WCR_WVR(7),
+ DBG_BCR_BVR_WCR_WVR(8),
+ DBG_BCR_BVR_WCR_WVR(9),
+ DBG_BCR_BVR_WCR_WVR(10),
+ DBG_BCR_BVR_WCR_WVR(11),
+ DBG_BCR_BVR_WCR_WVR(12),
+ DBG_BCR_BVR_WCR_WVR(13),
+ DBG_BCR_BVR_WCR_WVR(14),
+ DBG_BCR_BVR_WCR_WVR(15),
+
+ /* DBGDRAR (32bit) */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+ DBGBXVR(0),
+ /* DBGOSLAR */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+ DBGBXVR(1),
+ /* DBGOSLSR */
+ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+ DBGBXVR(2),
+ DBGBXVR(3),
+ /* DBGOSDLR */
+ { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+ DBGBXVR(4),
+ /* DBGPRCR */
+ { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+ DBGBXVR(5),
+ DBGBXVR(6),
+ DBGBXVR(7),
+ DBGBXVR(8),
+ DBGBXVR(9),
+ DBGBXVR(10),
+ DBGBXVR(11),
+ DBGBXVR(12),
+ DBGBXVR(13),
+ DBGBXVR(14),
+ DBGBXVR(15),
+
+ /* DBGDSAR (32bit) */
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+ /* DBGDEVID2 */
+ { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+ /* DBGDEVID1 */
+ { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+ /* DBGDEVID */
+ { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+ /* DBGCLAIMSET */
+ { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+ /* DBGCLAIMCLR */
+ { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+ /* DBGAUTHSTATUS */
+ { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+ /* DBGDRAR (64bit) */
+ { Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+ /* DBGDSAR (64bit) */
+ { Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
+/* Macro to expand the PMEVCNTRn register */
+#define PMU_PMEVCNTR(n) \
+ /* PMEVCNTRn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evcntr }
+
+/* Macro to expand the PMEVTYPERn register */
+#define PMU_PMEVTYPER(n) \
+ /* PMEVTYPERn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evtyper }
+
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
+static const struct sys_reg_desc cp15_regs[] = {
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 },
+ { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
+ /*
+ * DC{C,I,CI}SW operations:
+ */
+ { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+ { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+ { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
+ /* PMU */
+ { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
+
+ { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+ { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+
+ /* ICC_SRE */
+ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
+
+ { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+
+ /* CNTP_TVAL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
+ /* CNTP_CTL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+
+ /* PMEVCNTRn */
+ PMU_PMEVCNTR(0),
+ PMU_PMEVCNTR(1),
+ PMU_PMEVCNTR(2),
+ PMU_PMEVCNTR(3),
+ PMU_PMEVCNTR(4),
+ PMU_PMEVCNTR(5),
+ PMU_PMEVCNTR(6),
+ PMU_PMEVCNTR(7),
+ PMU_PMEVCNTR(8),
+ PMU_PMEVCNTR(9),
+ PMU_PMEVCNTR(10),
+ PMU_PMEVCNTR(11),
+ PMU_PMEVCNTR(12),
+ PMU_PMEVCNTR(13),
+ PMU_PMEVCNTR(14),
+ PMU_PMEVCNTR(15),
+ PMU_PMEVCNTR(16),
+ PMU_PMEVCNTR(17),
+ PMU_PMEVCNTR(18),
+ PMU_PMEVCNTR(19),
+ PMU_PMEVCNTR(20),
+ PMU_PMEVCNTR(21),
+ PMU_PMEVCNTR(22),
+ PMU_PMEVCNTR(23),
+ PMU_PMEVCNTR(24),
+ PMU_PMEVCNTR(25),
+ PMU_PMEVCNTR(26),
+ PMU_PMEVCNTR(27),
+ PMU_PMEVCNTR(28),
+ PMU_PMEVCNTR(29),
+ PMU_PMEVCNTR(30),
+ /* PMEVTYPERn */
+ PMU_PMEVTYPER(0),
+ PMU_PMEVTYPER(1),
+ PMU_PMEVTYPER(2),
+ PMU_PMEVTYPER(3),
+ PMU_PMEVTYPER(4),
+ PMU_PMEVTYPER(5),
+ PMU_PMEVTYPER(6),
+ PMU_PMEVTYPER(7),
+ PMU_PMEVTYPER(8),
+ PMU_PMEVTYPER(9),
+ PMU_PMEVTYPER(10),
+ PMU_PMEVTYPER(11),
+ PMU_PMEVTYPER(12),
+ PMU_PMEVTYPER(13),
+ PMU_PMEVTYPER(14),
+ PMU_PMEVTYPER(15),
+ PMU_PMEVTYPER(16),
+ PMU_PMEVTYPER(17),
+ PMU_PMEVTYPER(18),
+ PMU_PMEVTYPER(19),
+ PMU_PMEVTYPER(20),
+ PMU_PMEVTYPER(21),
+ PMU_PMEVTYPER(22),
+ PMU_PMEVTYPER(23),
+ PMU_PMEVTYPER(24),
+ PMU_PMEVTYPER(25),
+ PMU_PMEVTYPER(26),
+ PMU_PMEVTYPER(27),
+ PMU_PMEVTYPER(28),
+ PMU_PMEVTYPER(29),
+ PMU_PMEVTYPER(30),
+ /* PMCCFILTR */
+ { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
+ { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
+ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
+ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+ struct kvm_sys_reg_target_table *table)
+{
+ target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target,
+ bool mode_is_64,
+ size_t *num)
+{
+ struct kvm_sys_reg_target_table *table;
+
+ table = target_tables[target];
+ if (mode_is_64) {
+ *num = table->table64.num;
+ return table->table64.table;
+ } else {
+ *num = table->table32.num;
+ return table->table32.table;
+ }
+}
+
+#define reg_to_match_value(x) \
+ ({ \
+ unsigned long val; \
+ val = (x)->Op0 << 14; \
+ val |= (x)->Op1 << 11; \
+ val |= (x)->CRn << 7; \
+ val |= (x)->CRm << 3; \
+ val |= (x)->Op2; \
+ val; \
+ })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+ const unsigned long pval = (unsigned long)key;
+ const struct sys_reg_desc *r = elt;
+
+ return pval - reg_to_match_value(r);
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num)
+{
+ unsigned long pval = reg_to_match_value(params);
+
+ return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static void perform_access(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * Not having an accessor means that we have configured a trap
+ * that we don't know how to handle. This certainly qualifies
+ * as a gross bug that should be fixed right away.
+ */
+ BUG_ON(!r->access);
+
+ /* Skip instruction if instructed so */
+ if (likely(r->access(vcpu, params, r)))
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+}
+
+/*
+ * emulate_cp -- tries to match a sys_reg access in a handling table, and
+ * call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *table,
+ size_t num)
+{
+ const struct sys_reg_desc *r;
+
+ if (!table)
+ return -1; /* Not handled */
+
+ r = find_reg(params, table, num);
+
+ if (r) {
+ perform_access(vcpu, params, r);
+ return 0;
+ }
+
+ /* Not handled */
+ return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ int cp = -1;
+
+ switch(hsr_ec) {
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ cp = 15;
+ break;
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_64:
+ cp = 14;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ kvm_err("Unsupported guest CP%d access at: %08lx\n",
+ cp, *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
+{
+ struct sys_reg_params params;
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+ int Rt2 = (hsr >> 10) & 0x1f;
+
+ params.is_aarch32 = true;
+ params.is_32bit = false;
+ params.CRm = (hsr >> 1) & 0xf;
+ params.is_write = ((hsr & 1) == 0);
+
+ params.Op0 = 0;
+ params.Op1 = (hsr >> 16) & 0xf;
+ params.Op2 = 0;
+ params.CRn = 0;
+
+ /*
+ * Make a 64-bit value out of Rt and Rt2. As we use the same trap
+ * backends between AArch32 and AArch64, we get away with it.
+ */
+ if (params.is_write) {
+ params.regval = vcpu_get_reg(vcpu, Rt) & 0xffffffff;
+ params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
+ }
+
+ /*
+ * Try to emulate the coprocessor access using the target
+ * specific table first, and using the global table afterwards.
+ * If either of the tables contains a handler, handle the
+ * potential register operation in the case of a read and return
+ * with success.
+ */
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+ !emulate_cp(vcpu, &params, global, nr_global)) {
+ /* Split up the value between registers for the read side */
+ if (!params.is_write) {
+ vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
+ vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
+ }
+
+ return 1;
+ }
+
+ unhandled_cp_access(vcpu, &params);
+ return 1;
+}
+
+/**
+ * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
+{
+ struct sys_reg_params params;
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ params.is_aarch32 = true;
+ params.is_32bit = true;
+ params.CRm = (hsr >> 1) & 0xf;
+ params.regval = vcpu_get_reg(vcpu, Rt);
+ params.is_write = ((hsr & 1) == 0);
+ params.CRn = (hsr >> 10) & 0xf;
+ params.Op0 = 0;
+ params.Op1 = (hsr >> 14) & 0x7;
+ params.Op2 = (hsr >> 17) & 0x7;
+
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+ !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+ return 1;
+ }
+
+ unhandled_cp_access(vcpu, &params);
+ return 1;
+}
+
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_64(vcpu,
+ cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_32(vcpu,
+ cp15_regs, ARRAY_SIZE(cp15_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_64(vcpu,
+ cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+ NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_32(vcpu,
+ cp14_regs, ARRAY_SIZE(cp14_regs),
+ NULL, 0);
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ size_t num;
+ const struct sys_reg_desc *table, *r;
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+
+ /* Search target-specific then generic table. */
+ r = find_reg(params, table, num);
+ if (!r)
+ r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+ if (likely(r)) {
+ perform_access(vcpu, params, r);
+ } else {
+ kvm_err("Unsupported guest sys_reg access at: %lx\n",
+ *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+ }
+ return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *table, size_t num,
+ unsigned long *bmap)
+{
+ unsigned long i;
+
+ for (i = 0; i < num; i++)
+ if (table[i].reset) {
+ int reg = table[i].reg;
+
+ table[i].reset(vcpu, &table[i]);
+ if (reg > 0 && reg < NR_SYS_REGS)
+ set_bit(reg, bmap);
+ }
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct sys_reg_params params;
+ unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+ int ret;
+
+ trace_kvm_handle_sys_reg(esr);
+
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+ params.Op0 = (esr >> 20) & 3;
+ params.Op1 = (esr >> 14) & 0x7;
+ params.CRn = (esr >> 10) & 0xf;
+ params.CRm = (esr >> 1) & 0xf;
+ params.Op2 = (esr >> 17) & 0x7;
+ params.regval = vcpu_get_reg(vcpu, Rt);
+ params.is_write = !(esr & 1);
+
+ ret = emulate_sys_reg(vcpu, &params);
+
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+ return ret;
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U64:
+ /* Any unused index bits means it's not valid. */
+ if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+ | KVM_REG_ARM_COPROC_MASK
+ | KVM_REG_ARM64_SYSREG_OP0_MASK
+ | KVM_REG_ARM64_SYSREG_OP1_MASK
+ | KVM_REG_ARM64_SYSREG_CRN_MASK
+ | KVM_REG_ARM64_SYSREG_CRM_MASK
+ | KVM_REG_ARM64_SYSREG_OP2_MASK))
+ return false;
+ params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+ params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+ params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+ >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+ params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+ >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+ params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+ return true;
+ default:
+ return false;
+ }
+}
+
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num)
+{
+ if (!index_to_params(id, params))
+ return NULL;
+
+ return find_reg(params, table, num);
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+ u64 id)
+{
+ size_t num;
+ const struct sys_reg_desc *table, *r;
+ struct sys_reg_params params;
+
+ /* We only do sys_reg for now. */
+ if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+ return NULL;
+
+ if (!index_to_params(id, &params))
+ return NULL;
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+ r = find_reg(&params, table, num);
+ if (!r)
+ r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+ /* Not saved in the sys_reg array and not otherwise accessible? */
+ if (r && !(r->reg || r->get_user))
+ r = NULL;
+
+ return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg) \
+ static void get_##reg(struct kvm_vcpu *v, \
+ const struct sys_reg_desc *r) \
+ { \
+ ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
+ }
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+ { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
+ { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
+ { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
+ { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
+ { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
+{
+ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
+{
+ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+
+ r = find_reg_by_id(id, &params, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
+ if (!r)
+ return -ENOENT;
+
+ return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+ int err;
+ u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+ r = find_reg_by_id(id, &params, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
+ if (!r)
+ return -ENOENT;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /* This is what we mean by invariant: you can't change it. */
+ if (r->val != val)
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+ u32 level, ctype;
+
+ if (val >= CSSELR_MAX)
+ return false;
+
+ /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
+ level = (val >> 1);
+ ctype = (cache_levels >> (level * 3)) & 7;
+
+ switch (ctype) {
+ case 0: /* No cache */
+ return false;
+ case 1: /* Instruction cache only */
+ return (val & 1);
+ case 2: /* Data cache only */
+ case 4: /* Unified cache */
+ return !(val & 1);
+ case 3: /* Separate instruction and data caches */
+ return true;
+ default: /* Reserved: we can't know instruction or data. */
+ return false;
+ }
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+ u32 val;
+ u32 __user *uval = uaddr;
+
+ /* Fail if we have unknown bits set. */
+ if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+ | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+ return -ENOENT;
+
+ switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+ case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+ if (KVM_REG_SIZE(id) != 4)
+ return -ENOENT;
+ val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+ >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+ if (!is_valid_cache(val))
+ return -ENOENT;
+
+ return put_user(get_ccsidr(val), uval);
+ default:
+ return -ENOENT;
+ }
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+ u32 val, newval;
+ u32 __user *uval = uaddr;
+
+ /* Fail if we have unknown bits set. */
+ if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+ | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+ return -ENOENT;
+
+ switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+ case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+ if (KVM_REG_SIZE(id) != 4)
+ return -ENOENT;
+ val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+ >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+ if (!is_valid_cache(val))
+ return -ENOENT;
+
+ if (get_user(newval, uval))
+ return -EFAULT;
+
+ /* This is also invariant: you can't change it. */
+ if (newval != get_ccsidr(val))
+ return -EINVAL;
+ return 0;
+ default:
+ return -ENOENT;
+ }
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ const struct sys_reg_desc *r;
+ void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return demux_c15_get(reg->id, uaddr);
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+ return -ENOENT;
+
+ r = index_to_sys_reg_desc(vcpu, reg->id);
+ if (!r)
+ return get_invariant_sys_reg(reg->id, uaddr);
+
+ if (r->get_user)
+ return (r->get_user)(vcpu, r, reg, uaddr);
+
+ return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ const struct sys_reg_desc *r;
+ void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return demux_c15_set(reg->id, uaddr);
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+ return -ENOENT;
+
+ r = index_to_sys_reg_desc(vcpu, reg->id);
+ if (!r)
+ return set_invariant_sys_reg(reg->id, uaddr);
+
+ if (r->set_user)
+ return (r->set_user)(vcpu, r, reg, uaddr);
+
+ return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+ unsigned int i, count = 0;
+
+ for (i = 0; i < CSSELR_MAX; i++)
+ if (is_valid_cache(i))
+ count++;
+
+ return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+ u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+ unsigned int i;
+
+ val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+ for (i = 0; i < CSSELR_MAX; i++) {
+ if (!is_valid_cache(i))
+ continue;
+ if (put_user(val | i, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+ return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+ KVM_REG_ARM64_SYSREG |
+ (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+ (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+ (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+ (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+ (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+ if (!*uind)
+ return true;
+
+ if (put_user(sys_reg_to_index(reg), *uind))
+ return false;
+
+ (*uind)++;
+ return true;
+}
+
+static int walk_one_sys_reg(const struct sys_reg_desc *rd,
+ u64 __user **uind,
+ unsigned int *total)
+{
+ /*
+ * Ignore registers we trap but don't save,
+ * and for which no custom user accessor is provided.
+ */
+ if (!(rd->reg || rd->get_user))
+ return 0;
+
+ if (!copy_reg_to_user(rd, uind))
+ return -EFAULT;
+
+ (*total)++;
+ return 0;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+ const struct sys_reg_desc *i1, *i2, *end1, *end2;
+ unsigned int total = 0;
+ size_t num;
+ int err;
+
+ /* We check for duplicates here, to allow arch-specific overrides. */
+ i1 = get_target_table(vcpu->arch.target, true, &num);
+ end1 = i1 + num;
+ i2 = sys_reg_descs;
+ end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+ BUG_ON(i1 == end1 || i2 == end2);
+
+ /* Walk carefully, as both tables may refer to the same register. */
+ while (i1 || i2) {
+ int cmp = cmp_sys_reg(i1, i2);
+ /* target-specific overrides generic entry. */
+ if (cmp <= 0)
+ err = walk_one_sys_reg(i1, &uind, &total);
+ else
+ err = walk_one_sys_reg(i2, &uind, &total);
+
+ if (err)
+ return err;
+
+ if (cmp <= 0 && ++i1 == end1)
+ i1 = NULL;
+ if (cmp >= 0 && ++i2 == end2)
+ i2 = NULL;
+ }
+ return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+ return ARRAY_SIZE(invariant_sys_regs)
+ + num_demux_regs()
+ + walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ unsigned int i;
+ int err;
+
+ /* Then give them all the invariant registers' indices. */
+ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+ if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ err = walk_sys_regs(vcpu, uindices);
+ if (err < 0)
+ return err;
+ uindices += err;
+
+ return write_demux_regids(uindices);
+}
+
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 1; i < n; i++) {
+ if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+ kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void kvm_sys_reg_table_init(void)
+{
+ unsigned int i;
+ struct sys_reg_desc clidr;
+
+ /* Make sure tables are unique and in order. */
+ BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+ BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+ BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+ BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+ BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+ BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+
+ /* We abuse the reset function to overwrite the table itself. */
+ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+ invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+ /*
+ * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
+ *
+ * If software reads the Cache Type fields from Ctype1
+ * upwards, once it has seen a value of 0b000, no caches
+ * exist at further-out levels of the hierarchy. So, for
+ * example, if Ctype3 is the first Cache Type field with a
+ * value of 0b000, the values of Ctype4 to Ctype7 must be
+ * ignored.
+ */
+ get_clidr_el1(NULL, &clidr); /* Ugly... */
+ cache_levels = clidr.val;
+ for (i = 0; i < 7; i++)
+ if (((cache_levels >> (i*3)) & 7) == 0)
+ break;
+ /* Clear all higher bits. */
+ cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+ size_t num;
+ const struct sys_reg_desc *table;
+ DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, };
+
+ /* Generic chip reset first (so target could override). */
+ reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap);
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+ reset_sys_reg_descs(vcpu, table, num, bmap);
+
+ for (num = 1; num < NR_SYS_REGS; num++) {
+ if (WARN(!test_bit(num, bmap),
+ "Didn't reset __vcpu_sys_reg(%zi)\n", num))
+ break;
+ }
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000..cd710f8b6
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+ u8 Op0;
+ u8 Op1;
+ u8 CRn;
+ u8 CRm;
+ u8 Op2;
+ u64 regval;
+ bool is_write;
+ bool is_aarch32;
+ bool is_32bit; /* Only valid if is_aarch32 is true */
+};
+
+struct sys_reg_desc {
+ /* MRS/MSR instruction which accesses it. */
+ u8 Op0;
+ u8 Op1;
+ u8 CRn;
+ u8 CRm;
+ u8 Op2;
+
+ /* Trapped access from guest, if non-NULL. */
+ bool (*access)(struct kvm_vcpu *,
+ struct sys_reg_params *,
+ const struct sys_reg_desc *);
+
+ /* Initialization for vcpu. */
+ void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+ /* Index into sys_reg[], or 0 if we don't need to save it. */
+ int reg;
+
+ /* Value (usually reset value) */
+ u64 val;
+
+ /* Custom get/set_user functions, fallback to generic if NULL */
+ int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr);
+ int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr);
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+ /* Look, we even formatted it for you to paste into the table! */
+ kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p)
+{
+ return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p)
+{
+ p->regval = 0;
+ return true;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ BUG_ON(!r->reg);
+ BUG_ON(r->reg >= NR_SYS_REGS);
+ __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ BUG_ON(!r->reg);
+ BUG_ON(r->reg >= NR_SYS_REGS);
+ __vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+ const struct sys_reg_desc *i2)
+{
+ BUG_ON(i1 == i2);
+ if (!i1)
+ return 1;
+ else if (!i2)
+ return -1;
+ if (i1->Op0 != i2->Op0)
+ return i1->Op0 - i2->Op0;
+ if (i1->Op1 != i2->Op1)
+ return i1->Op1 - i2->Op1;
+ if (i1->CRn != i2->CRn)
+ return i1->CRn - i2->CRn;
+ if (i1->CRm != i2->CRm)
+ return i1->CRm - i2->CRm;
+ return i1->Op2 - i2->Op2;
+}
+
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num);
+
+#define Op0(_x) .Op0 = _x
+#define Op1(_x) .Op1 = _x
+#define CRn(_x) .CRn = _x
+#define CRm(_x) .CRm = _x
+#define Op2(_x) .Op2 = _x
+
+#define SYS_DESC(reg) \
+ Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)), \
+ CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
+ Op2(sys_reg_Op2(reg))
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000..ddb8497d1
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/sysreg.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
+ return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+ { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+ /* ACTLR */
+ { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+ access_actlr },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+ .table64 = {
+ .table = genericv8_sys_regs,
+ .num = ARRAY_SIZE(genericv8_sys_regs),
+ },
+ .table32 = {
+ .table = genericv8_cp15_regs,
+ .num = ARRAY_SIZE(genericv8_cp15_regs),
+ },
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+ BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+ &genericv8_sys_regs[i]) >= 0);
+
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
+ &genericv8_target_table);
+
+ return 0;
+}
+late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
new file mode 100644
index 000000000..3b82fb1dd
--- /dev/null
+++ b/arch/arm64/kvm/trace.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM64_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(kvm_wfx_arm64,
+ TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
+ TP_ARGS(vcpu_pc, is_wfe),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, vcpu_pc)
+ __field(bool, is_wfe)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->is_wfe = is_wfe;
+ ),
+
+ TP_printk("guest executed wf%c at: 0x%08lx",
+ __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_hvc_arm64,
+ TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
+ TP_ARGS(vcpu_pc, r0, imm),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, vcpu_pc)
+ __field(unsigned long, r0)
+ __field(unsigned long, imm)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->r0 = r0;
+ __entry->imm = imm;
+ ),
+
+ TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
+ __entry->vcpu_pc, __entry->r0, __entry->imm)
+);
+
+TRACE_EVENT(kvm_arm_setup_debug,
+ TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+ TP_ARGS(vcpu, guest_debug),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_clear_debug,
+ TP_PROTO(__u32 guest_debug),
+ TP_ARGS(guest_debug),
+
+ TP_STRUCT__entry(
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("flags: 0x%08x", __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_set_dreg32,
+ TP_PROTO(const char *name, __u32 value),
+ TP_ARGS(name, value),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(__u32, value)
+ ),
+
+ TP_fast_assign(
+ __entry->name = name;
+ __entry->value = value;
+ ),
+
+ TP_printk("%s: 0x%08x", __entry->name, __entry->value)
+);
+
+TRACE_DEFINE_SIZEOF(__u64);
+
+TRACE_EVENT(kvm_arm_set_regset,
+ TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
+ TP_ARGS(type, len, control, value),
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(int, len)
+ __array(u64, ctrls, 16)
+ __array(u64, values, 16)
+ ),
+ TP_fast_assign(
+ __entry->name = type;
+ __entry->len = len;
+ memcpy(__entry->ctrls, control, len << 3);
+ memcpy(__entry->values, value, len << 3);
+ ),
+ TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
+ __print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
+ __print_array(__entry->values, __entry->len, sizeof(__u64)))
+);
+
+TRACE_EVENT(trap_reg,
+ TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
+ TP_ARGS(fn, reg, is_write, write_value),
+
+ TP_STRUCT__entry(
+ __field(const char *, fn)
+ __field(int, reg)
+ __field(bool, is_write)
+ __field(u64, write_value)
+ ),
+
+ TP_fast_assign(
+ __entry->fn = fn;
+ __entry->reg = reg;
+ __entry->is_write = is_write;
+ __entry->write_value = write_value;
+ ),
+
+ TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
+);
+
+TRACE_EVENT(kvm_handle_sys_reg,
+ TP_PROTO(unsigned long hsr),
+ TP_ARGS(hsr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, hsr)
+ ),
+
+ TP_fast_assign(
+ __entry->hsr = hsr;
+ ),
+
+ TP_printk("HSR 0x%08lx", __entry->hsr)
+);
+
+TRACE_EVENT(kvm_set_guest_debug,
+ TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+ TP_ARGS(vcpu, guest_debug),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+
+#endif /* _TRACE_ARM64_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
new file mode 100644
index 000000000..c712a7376
--- /dev/null
+++ b/arch/arm64/kvm/va_layout.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/random.h>
+#include <linux/memblock.h>
+#include <asm/alternative.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ */
+static u8 tag_lsb;
+/*
+ * The random hyp VA tag value with the region bit if hyp randomization is used
+ */
+static u64 tag_val;
+static u64 va_mask;
+
+static void compute_layout(void)
+{
+ phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
+ u64 hyp_va_msb;
+ int kva_msb;
+
+ /* Where is my RAM region? */
+ hyp_va_msb = idmap_addr & BIT(VA_BITS - 1);
+ hyp_va_msb ^= BIT(VA_BITS - 1);
+
+ kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ (u64)(high_memory - 1));
+
+ if (kva_msb == (VA_BITS - 1)) {
+ /*
+ * No space in the address, let's compute the mask so
+ * that it covers (VA_BITS - 1) bits, and the region
+ * bit. The tag stays set to zero.
+ */
+ va_mask = BIT(VA_BITS - 1) - 1;
+ va_mask |= hyp_va_msb;
+ } else {
+ /*
+ * We do have some free bits to insert a random tag.
+ * Hyp VAs are now created from kernel linear map VAs
+ * using the following formula (with V == VA_BITS):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ */
+ tag_lsb = kva_msb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
+ tag_val |= hyp_va_msb;
+ tag_val >>= tag_lsb;
+ }
+}
+
+static u32 compute_instruction(int n, u32 rd, u32 rn)
+{
+ u32 insn = AARCH64_BREAK_FAULT;
+
+ switch (n) {
+ case 0:
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_VARIANT_64BIT,
+ rn, rd, va_mask);
+ break;
+
+ case 1:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd,
+ tag_lsb);
+ break;
+
+ case 2:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(11, 0),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 3:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(23, 12),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 4:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd, 64 - tag_lsb);
+ break;
+ }
+
+ return insn;
+}
+
+void __init kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ int i;
+
+ BUG_ON(nr_inst != 5);
+
+ if (!has_vhe() && !va_mask)
+ compute_layout();
+
+ for (i = 0; i < nr_inst; i++) {
+ u32 rd, rn, insn, oinsn;
+
+ /*
+ * VHE doesn't need any address translation, let's NOP
+ * everything.
+ *
+ * Alternatively, if we don't have any spare bits in
+ * the address, NOP everything after masking that
+ * kernel VA.
+ */
+ if (has_vhe() || (!tag_lsb && i > 0)) {
+ updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+ continue;
+ }
+
+ oinsn = le32_to_cpu(origptr[i]);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+ rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn);
+
+ insn = compute_instruction(i, rd, rn);
+ BUG_ON(insn == AARCH64_BREAK_FAULT);
+
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
+void *__kvm_bp_vect_base;
+int __kvm_harden_el2_vector_slot;
+
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u64 addr;
+ u32 insn;
+
+ BUG_ON(nr_inst != 5);
+
+ if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+ return;
+ }
+
+ if (!va_mask)
+ compute_layout();
+
+ /*
+ * Compute HYP VA by using the same computation as kern_hyp_va()
+ */
+ addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
+ addr &= va_mask;
+ addr |= tag_val << tag_lsb;
+
+ /* Use PC[10:7] to branch to the same vector in KVM */
+ addr |= ((u64)origptr & GENMASK_ULL(10, 7));
+
+ /*
+ * Branch to the second instruction in the vectors in order to
+ * avoid the initial store on the stack (which we already
+ * perform in the hardening vectors).
+ */
+ addr += AARCH64_INSN_SIZE;
+
+ /* stp x0, x1, [sp, #-16]! */
+ insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
+ AARCH64_INSN_REG_1,
+ AARCH64_INSN_REG_SP,
+ -16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movz x0, #(addr & 0xffff) */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)addr,
+ 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 16),
+ 16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 32),
+ 32,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* br x0 */
+ insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0,
+ AARCH64_INSN_BRANCH_NOLINK);
+ *updptr++ = cpu_to_le32(insn);
+}
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
new file mode 100644
index 000000000..c77d508b7
--- /dev/null
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -0,0 +1,314 @@
+/*
+ * VGIC system registers handling functions for AArch64 mode
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include "vgic.h"
+#include "sys_regs.h"
+
+static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v;
+ struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_vmcr vmcr;
+ u64 val;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ val = p->regval;
+
+ /*
+ * Disallow restoring VM state if not supported by this
+ * hardware.
+ */
+ host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >>
+ ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1;
+ if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
+ return false;
+
+ vgic_v3_cpu->num_pri_bits = host_pri_bits;
+
+ host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >>
+ ICC_CTLR_EL1_ID_BITS_SHIFT;
+ if (host_id_bits > vgic_v3_cpu->num_id_bits)
+ return false;
+
+ vgic_v3_cpu->num_id_bits = host_id_bits;
+
+ host_seis = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT);
+ seis = (val & ICC_CTLR_EL1_SEIS_MASK) >>
+ ICC_CTLR_EL1_SEIS_SHIFT;
+ if (host_seis != seis)
+ return false;
+
+ host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT);
+ a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT;
+ if (host_a3v != a3v)
+ return false;
+
+ /*
+ * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
+ * The vgic_set_vmcr() will convert to ICH_VMCR layout.
+ */
+ vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
+ vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ val = 0;
+ val |= (vgic_v3_cpu->num_pri_bits - 1) <<
+ ICC_CTLR_EL1_PRI_BITS_SHIFT;
+ val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ val |= ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) <<
+ ICC_CTLR_EL1_SEIS_SHIFT;
+ val |= ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) <<
+ ICC_CTLR_EL1_A3V_SHIFT;
+ /*
+ * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
+ * Extract it directly using ICC_CTLR_EL1 reg definitions.
+ */
+ val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
+ val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
+
+ p->regval = val;
+ }
+
+ return true;
+}
+
+static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >>
+ ICC_BPR0_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) &
+ ICC_BPR0_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ if (!p->is_write)
+ p->regval = 0;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (!vmcr.cbpr) {
+ if (p->is_write) {
+ vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
+ ICC_BPR1_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) &
+ ICC_BPR1_EL1_MASK;
+ }
+ } else {
+ if (!p->is_write)
+ p->regval = min((vmcr.bpr + 1), 7U);
+ }
+
+ return true;
+}
+
+static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >>
+ ICC_IGRPEN0_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) &
+ ICC_IGRPEN0_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >>
+ ICC_IGRPEN1_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) &
+ ICC_IGRPEN1_EL1_MASK;
+ }
+
+ return true;
+}
+
+static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p, u8 apr, u8 idx)
+{
+ struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+ uint32_t *ap_reg;
+
+ if (apr)
+ ap_reg = &vgicv3->vgic_ap1r[idx];
+ else
+ ap_reg = &vgicv3->vgic_ap0r[idx];
+
+ if (p->is_write)
+ *ap_reg = p->regval;
+ else
+ p->regval = *ap_reg;
+}
+
+static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r, u8 apr)
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ goto err;
+
+ vgic_v3_access_apr_reg(vcpu, p, apr, idx);
+ return true;
+err:
+ if (!p->is_write)
+ p->regval = 0;
+
+ return false;
+}
+
+static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+
+{
+ return access_gic_aprn(vcpu, p, r, 0);
+}
+
+static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return access_gic_aprn(vcpu, p, r, 1);
+}
+
+static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /* Validate SRE bit */
+ if (p->is_write) {
+ if (!(p->regval & ICC_SRE_EL1_SRE))
+ return false;
+ } else {
+ p->regval = vgicv3->vgic_sre;
+ }
+
+ return true;
+}
+static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
+ { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr },
+ { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 },
+ { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr },
+ { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 },
+};
+
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg)
+{
+ struct sys_reg_params params;
+ u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
+
+ params.regval = *reg;
+ params.is_write = is_write;
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+
+ if (find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs)))
+ return 0;
+
+ return -ENXIO;
+}
+
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+ u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
+
+ if (is_write)
+ params.regval = *reg;
+ params.is_write = is_write;
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+
+ r = find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
+ if (!r)
+ return -ENXIO;
+
+ if (!r->access(vcpu, &params, r))
+ return -EINVAL;
+
+ if (!is_write)
+ *reg = params.regval;
+
+ return 0;
+}