diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /arch/mips/kvm | |
parent | Initial commit. (diff) | |
download | linux-c109f8d9e922037b3fa45f46d78384d49db8ad76.tar.xz linux-c109f8d9e922037b3fa45f46d78384d49db8ad76.zip |
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/mips/kvm')
-rw-r--r-- | arch/mips/kvm/00README.txt | 31 | ||||
-rw-r--r-- | arch/mips/kvm/Kconfig | 77 | ||||
-rw-r--r-- | arch/mips/kvm/Makefile | 24 | ||||
-rw-r--r-- | arch/mips/kvm/callback.c | 14 | ||||
-rw-r--r-- | arch/mips/kvm/commpage.c | 32 | ||||
-rw-r--r-- | arch/mips/kvm/commpage.h | 24 | ||||
-rw-r--r-- | arch/mips/kvm/dyntrans.c | 143 | ||||
-rw-r--r-- | arch/mips/kvm/emulate.c | 2829 | ||||
-rw-r--r-- | arch/mips/kvm/entry.c | 943 | ||||
-rw-r--r-- | arch/mips/kvm/fpu.S | 125 | ||||
-rw-r--r-- | arch/mips/kvm/hypcall.c | 53 | ||||
-rw-r--r-- | arch/mips/kvm/interrupt.c | 242 | ||||
-rw-r--r-- | arch/mips/kvm/interrupt.h | 55 | ||||
-rw-r--r-- | arch/mips/kvm/mips.c | 1732 | ||||
-rw-r--r-- | arch/mips/kvm/mmu.c | 1261 | ||||
-rw-r--r-- | arch/mips/kvm/msa.S | 161 | ||||
-rw-r--r-- | arch/mips/kvm/stats.c | 63 | ||||
-rw-r--r-- | arch/mips/kvm/tlb.c | 660 | ||||
-rw-r--r-- | arch/mips/kvm/trace.h | 346 | ||||
-rw-r--r-- | arch/mips/kvm/trap_emul.c | 1329 | ||||
-rw-r--r-- | arch/mips/kvm/vz.c | 3223 |
21 files changed, 13367 insertions, 0 deletions
diff --git a/arch/mips/kvm/00README.txt b/arch/mips/kvm/00README.txt new file mode 100644 index 000000000..51617e481 --- /dev/null +++ b/arch/mips/kvm/00README.txt @@ -0,0 +1,31 @@ +KVM/MIPS Trap & Emulate Release Notes +===================================== + +(1) KVM/MIPS should support MIPS32R2 and beyond. It has been tested on the following platforms: + Malta Board with FPGA based 34K + Sigma Designs TangoX board with a 24K based 8654 SoC. + Malta Board with 74K @ 1GHz + +(2) Both Guest kernel and Guest Userspace execute in UM. + Guest User address space: 0x00000000 -> 0x40000000 + Guest Kernel Unmapped: 0x40000000 -> 0x60000000 + Guest Kernel Mapped: 0x60000000 -> 0x80000000 + + Guest Usermode virtual memory is limited to 1GB. + +(2) 16K Page Sizes: Both Host Kernel and Guest Kernel should have the same page size, currently at least 16K. + Note that due to cache aliasing issues, 4K page sizes are NOT supported. + +(3) No HugeTLB Support + Both the host kernel and Guest kernel should have the page size set to 16K. + This will be implemented in a future release. + +(4) KVM/MIPS does not have support for SMP Guests + Linux-3.7-rc2 based SMP guest hangs due to the following code sequence in the generated TLB handlers: + LL/TLBP/SC. Since the TLBP instruction causes a trap the reservation gets cleared + when we ERET back to the guest. This causes the guest to hang in an infinite loop. + This will be fixed in a future release. + +(5) Use Host FPU + Currently KVM/MIPS emulates a 24K CPU without a FPU. + This will be fixed in a future release diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig new file mode 100644 index 000000000..76b93a9c8 --- /dev/null +++ b/arch/mips/kvm/Kconfig @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# KVM configuration +# +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM + select EXPORT_UASM + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_MMIO + select MMU_NOTIFIER + select SRCU + ---help--- + Support for hosting Guest kernels. + +choice + prompt "Virtualization mode" + depends on KVM + default KVM_MIPS_TE + +config KVM_MIPS_TE + bool "Trap & Emulate" + ---help--- + Use trap and emulate to virtualize 32-bit guests in user mode. This + does not require any special hardware Virtualization support beyond + standard MIPS32/64 r2 or later, but it does require the guest kernel + to be configured with CONFIG_KVM_GUEST=y so that it resides in the + user address segment. + +config KVM_MIPS_VZ + bool "MIPS Virtualization (VZ) ASE" + ---help--- + Use the MIPS Virtualization (VZ) ASE to virtualize guests. This + supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n), + but requires hardware support. + +endchoice + +config KVM_MIPS_DYN_TRANS + bool "KVM/MIPS: Dynamic binary translation to reduce traps" + depends on KVM_MIPS_TE + default y + ---help--- + When running in Trap & Emulate mode patch privileged + instructions to reduce the number of traps. + + If unsure, say Y. + +config KVM_MIPS_DEBUG_COP0_COUNTERS + bool "Maintain counters for COP0 accesses" + depends on KVM + ---help--- + Maintain statistics for Guest COP0 accesses. + A histogram of COP0 accesses is printed when the VM is + shutdown. + + If unsure, say N. + +source drivers/vhost/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile new file mode 100644 index 000000000..01affc1d2 --- /dev/null +++ b/arch/mips/kvm/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for KVM support for MIPS +# + +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm + +common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o + +kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ + interrupt.o stats.o commpage.o \ + fpu.o +kvm-objs += hypcall.o +kvm-objs += mmu.o + +ifdef CONFIG_KVM_MIPS_VZ +kvm-objs += vz.o +else +kvm-objs += dyntrans.o +kvm-objs += trap_emul.o +endif +obj-$(CONFIG_KVM) += kvm.o +obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/callback.c b/arch/mips/kvm/callback.c new file mode 100644 index 000000000..d88aa2173 --- /dev/null +++ b/arch/mips/kvm/callback.c @@ -0,0 +1,14 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Yann Le Du <ledu@kymasys.com> + */ + +#include <linux/export.h> +#include <linux/kvm_host.h> + +struct kvm_mips_callbacks *kvm_mips_callbacks; +EXPORT_SYMBOL_GPL(kvm_mips_callbacks); diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c new file mode 100644 index 000000000..f43629979 --- /dev/null +++ b/arch/mips/kvm/commpage.c @@ -0,0 +1,32 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * commpage, currently used for Virtual COP0 registers. + * Mapped into the guest kernel @ KVM_GUEST_COMMPAGE_ADDR. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/bootmem.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> + +#include <linux/kvm_host.h> + +#include "commpage.h" + +void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) +{ + struct kvm_mips_commpage *page = vcpu->arch.kseg0_commpage; + + /* Specific init values for fields */ + vcpu->arch.cop0 = &page->cop0; +} diff --git a/arch/mips/kvm/commpage.h b/arch/mips/kvm/commpage.h new file mode 100644 index 000000000..08c5fa2bb --- /dev/null +++ b/arch/mips/kvm/commpage.h @@ -0,0 +1,24 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: commpage: mapped into get kernel space + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#ifndef __KVM_MIPS_COMMPAGE_H__ +#define __KVM_MIPS_COMMPAGE_H__ + +struct kvm_mips_commpage { + /* COP0 state is mapped into Guest kernel via commpage */ + struct mips_coproc cop0; +}; + +#define KVM_MIPS_COMM_EIDI_OFFSET 0x0 + +extern void kvm_mips_commpage_init(struct kvm_vcpu *vcpu); + +#endif /* __KVM_MIPS_COMMPAGE_H__ */ diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c new file mode 100644 index 000000000..f8e772564 --- /dev/null +++ b/arch/mips/kvm/dyntrans.c @@ -0,0 +1,143 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Binary Patching for privileged instructions, reduces traps. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/bootmem.h> +#include <asm/cacheflush.h> + +#include "commpage.h" + +/** + * kvm_mips_trans_replace() - Replace trapping instruction in guest memory. + * @vcpu: Virtual CPU. + * @opc: PC of instruction to replace. + * @replace: Instruction to write + */ +static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, + union mips_instruction replace) +{ + unsigned long vaddr = (unsigned long)opc; + int err; + +retry: + /* The GVA page table is still active so use the Linux TLB handlers */ + kvm_trap_emul_gva_lockless_begin(vcpu); + err = put_user(replace.word, opc); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (unlikely(err)) { + /* + * We write protect clean pages in GVA page table so normal + * Linux TLB mod handler doesn't silently dirty the page. + * Its also possible we raced with a GVA invalidation. + * Try to force the page to become dirty. + */ + err = kvm_trap_emul_gva_fault(vcpu, vaddr, true); + if (unlikely(err)) { + kvm_info("%s: Address unwriteable: %p\n", + __func__, opc); + return -EFAULT; + } + + /* + * Try again. This will likely trigger a TLB refill, which will + * fetch the new dirty entry from the GVA page table, which + * should then succeed. + */ + goto retry; + } + __local_flush_icache_user_range(vaddr, vaddr + 4); + + return 0; +} + +int kvm_mips_trans_cache_index(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) +{ + union mips_instruction nop_inst = { 0 }; + + /* Replace the CACHE instruction, with a NOP */ + return kvm_mips_trans_replace(vcpu, opc, nop_inst); +} + +/* + * Address based CACHE instructions are transformed into synci(s). A little + * heavy for just D-cache invalidates, but avoids an expensive trap + */ +int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) +{ + union mips_instruction synci_inst = { 0 }; + + synci_inst.i_format.opcode = bcond_op; + synci_inst.i_format.rs = inst.i_format.rs; + synci_inst.i_format.rt = synci_op; + if (cpu_has_mips_r6) + synci_inst.i_format.simmediate = inst.spec3_format.simmediate; + else + synci_inst.i_format.simmediate = inst.i_format.simmediate; + + return kvm_mips_trans_replace(vcpu, opc, synci_inst); +} + +int kvm_mips_trans_mfc0(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) +{ + union mips_instruction mfc0_inst = { 0 }; + u32 rd, sel; + + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + if (rd == MIPS_CP0_ERRCTL && sel == 0) { + mfc0_inst.r_format.opcode = spec_op; + mfc0_inst.r_format.rd = inst.c0r_format.rt; + mfc0_inst.r_format.func = add_op; + } else { + mfc0_inst.i_format.opcode = lw_op; + mfc0_inst.i_format.rt = inst.c0r_format.rt; + mfc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR | + offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]); +#ifdef CONFIG_CPU_BIG_ENDIAN + if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8) + mfc0_inst.i_format.simmediate |= 4; +#endif + } + + return kvm_mips_trans_replace(vcpu, opc, mfc0_inst); +} + +int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) +{ + union mips_instruction mtc0_inst = { 0 }; + u32 rd, sel; + + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + mtc0_inst.i_format.opcode = sw_op; + mtc0_inst.i_format.rt = inst.c0r_format.rt; + mtc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR | + offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]); +#ifdef CONFIG_CPU_BIG_ENDIAN + if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8) + mtc0_inst.i_format.simmediate |= 4; +#endif + + return kvm_mips_trans_replace(vcpu, opc, mtc0_inst); +} diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c new file mode 100644 index 000000000..4144bfaef --- /dev/null +++ b/arch/mips/kvm/emulate.c @@ -0,0 +1,2829 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Instruction/Exception emulation + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/ktime.h> +#include <linux/kvm_host.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/bootmem.h> +#include <linux/random.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/cacheops.h> +#include <asm/cpu-info.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/inst.h> + +#undef CONFIG_MIPS_MT +#include <asm/r4kcache.h> +#define CONFIG_MIPS_MT + +#include "interrupt.h" +#include "commpage.h" + +#include "trace.h" + +/* + * Compute the return address and do emulate branch simulation, if required. + * This function should be called only in branch delay slot active. + */ +static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, + unsigned long *out) +{ + unsigned int dspcontrol; + union mips_instruction insn; + struct kvm_vcpu_arch *arch = &vcpu->arch; + long epc = instpc; + long nextpc; + int err; + + if (epc & 3) { + kvm_err("%s: unaligned epc\n", __func__); + return -EINVAL; + } + + /* Read the instruction */ + err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word); + if (err) + return err; + + switch (insn.i_format.opcode) { + /* jr and jalr are in r_format format. */ + case spec_op: + switch (insn.r_format.func) { + case jalr_op: + arch->gprs[insn.r_format.rd] = epc + 8; + /* Fall through */ + case jr_op: + nextpc = arch->gprs[insn.r_format.rs]; + break; + default: + return -EINVAL; + } + break; + + /* + * This group contains: + * bltz_op, bgez_op, bltzl_op, bgezl_op, + * bltzal_op, bgezal_op, bltzall_op, bgezall_op. + */ + case bcond_op: + switch (insn.i_format.rt) { + case bltz_op: + case bltzl_op: + if ((long)arch->gprs[insn.i_format.rs] < 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgez_op: + case bgezl_op: + if ((long)arch->gprs[insn.i_format.rs] >= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bltzal_op: + case bltzall_op: + arch->gprs[31] = epc + 8; + if ((long)arch->gprs[insn.i_format.rs] < 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgezal_op: + case bgezall_op: + arch->gprs[31] = epc + 8; + if ((long)arch->gprs[insn.i_format.rs] >= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + case bposge32_op: + if (!cpu_has_dsp) { + kvm_err("%s: DSP branch but not DSP ASE\n", + __func__); + return -EINVAL; + } + + dspcontrol = rddsp(0x01); + + if (dspcontrol >= 32) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + default: + return -EINVAL; + } + break; + + /* These are unconditional and in j_format. */ + case jal_op: + arch->gprs[31] = instpc + 8; + case j_op: + epc += 4; + epc >>= 28; + epc <<= 28; + epc |= (insn.j_format.target << 2); + nextpc = epc; + break; + + /* These are conditional and in i_format. */ + case beq_op: + case beql_op: + if (arch->gprs[insn.i_format.rs] == + arch->gprs[insn.i_format.rt]) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bne_op: + case bnel_op: + if (arch->gprs[insn.i_format.rs] != + arch->gprs[insn.i_format.rt]) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case blez_op: /* POP06 */ +#ifndef CONFIG_CPU_MIPSR6 + case blezl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; + if ((long)arch->gprs[insn.i_format.rs] <= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgtz_op: /* POP07 */ +#ifndef CONFIG_CPU_MIPSR6 + case bgtzl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; + if ((long)arch->gprs[insn.i_format.rs] > 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + /* And now the FPA/cp1 branch instructions. */ + case cop1_op: + kvm_err("%s: unsupported cop1_op\n", __func__); + return -EINVAL; + +#ifdef CONFIG_CPU_MIPSR6 + /* R6 added the following compact branches with forbidden slots */ + case blezl_op: /* POP26 */ + case bgtzl_op: /* POP27 */ + /* only rt == 0 isn't compact branch */ + if (insn.i_format.rt != 0) + goto compact_branch; + return -EINVAL; + case pop10_op: + case pop30_op: + /* only rs == rt == 0 is reserved, rest are compact branches */ + if (insn.i_format.rs != 0 || insn.i_format.rt != 0) + goto compact_branch; + return -EINVAL; + case pop66_op: + case pop76_op: + /* only rs == 0 isn't compact branch */ + if (insn.i_format.rs != 0) + goto compact_branch; + return -EINVAL; +compact_branch: + /* + * If we've hit an exception on the forbidden slot, then + * the branch must not have been taken. + */ + epc += 8; + nextpc = epc; + break; +#else +compact_branch: + /* Fall through - Compact branches not supported before R6 */ +#endif + default: + return -EINVAL; + } + + *out = nextpc; + return 0; +} + +enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) +{ + int err; + + if (cause & CAUSEF_BD) { + err = kvm_compute_return_epc(vcpu, vcpu->arch.pc, + &vcpu->arch.pc); + if (err) + return EMULATE_FAIL; + } else { + vcpu->arch.pc += 4; + } + + kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc); + + return EMULATE_DONE; +} + +/** + * kvm_get_badinstr() - Get bad instruction encoding. + * @opc: Guest pointer to faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the faulting instruction, using the saved + * BadInstr register value if it exists, otherwise falling back to reading guest + * memory at @opc. + * + * Returns: The instruction encoding of the faulting instruction. + */ +int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstr) { + *out = vcpu->arch.host_cp0_badinstr; + return 0; + } else { + return kvm_get_inst(opc, vcpu, out); + } +} + +/** + * kvm_get_badinstrp() - Get bad prior instruction encoding. + * @opc: Guest pointer to prior faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the prior faulting instruction (the branch + * containing the delay slot which faulted), using the saved BadInstrP register + * value if it exists, otherwise falling back to reading guest memory at @opc. + * + * Returns: The instruction encoding of the prior faulting instruction. + */ +int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstrp) { + *out = vcpu->arch.host_cp0_badinstrp; + return 0; + } else { + return kvm_get_inst(opc, vcpu, out); + } +} + +/** + * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled. + * @vcpu: Virtual CPU. + * + * Returns: 1 if the CP0_Count timer is disabled by either the guest + * CP0_Cause.DC bit or the count_ctl.DC bit. + * 0 otherwise (in which case CP0_Count timer is running). + */ +int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || + (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); +} + +/** + * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count. + * + * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static u32 kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now) +{ + s64 now_ns, periods; + u64 delta; + + now_ns = ktime_to_ns(now); + delta = now_ns + vcpu->arch.count_dyn_bias; + + if (delta >= vcpu->arch.count_period) { + /* If delta is out of safe range the bias needs adjusting */ + periods = div64_s64(now_ns, vcpu->arch.count_period); + vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period; + /* Recalculate delta with new bias */ + delta = now_ns + vcpu->arch.count_dyn_bias; + } + + /* + * We've ensured that: + * delta < count_period + * + * Therefore the intermediate delta*count_hz will never overflow since + * at the boundary condition: + * delta = count_period + * delta = NSEC_PER_SEC * 2^32 / count_hz + * delta * count_hz = NSEC_PER_SEC * 2^32 + */ + return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC); +} + +/** + * kvm_mips_count_time() - Get effective current time. + * @vcpu: Virtual CPU. + * + * Get effective monotonic ktime. This is usually a straightforward ktime_get(), + * except when the master disable bit is set in count_ctl, in which case it is + * count_resume, i.e. the time that the count was disabled. + * + * Returns: Effective monotonic ktime for CP0_Count. + */ +static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) +{ + if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + return vcpu->arch.count_resume; + + return ktime_get(); +} + +/** + * kvm_mips_read_count_running() - Read the current count value as if running. + * @vcpu: Virtual CPU. + * @now: Kernel time to read CP0_Count at. + * + * Returns the current guest CP0_Count register at time @now and handles if the + * timer interrupt is pending and hasn't been handled yet. + * + * Returns: The current value of the guest CP0_Count register. + */ +static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + ktime_t expires, threshold; + u32 count, compare; + int running; + + /* Calculate the biased and scaled guest CP0_Count */ + count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + compare = kvm_read_c0_guest_compare(cop0); + + /* + * Find whether CP0_Count has reached the closest timer interrupt. If + * not, we shouldn't inject it. + */ + if ((s32)(count - compare) < 0) + return count; + + /* + * The CP0_Count we're going to return has already reached the closest + * timer interrupt. Quickly check if it really is a new interrupt by + * looking at whether the interval until the hrtimer expiry time is + * less than 1/4 of the timer period. + */ + expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer); + threshold = ktime_add_ns(now, vcpu->arch.count_period / 4); + if (ktime_before(expires, threshold)) { + /* + * Cancel it while we handle it so there's no chance of + * interference with the timeout handler. + */ + running = hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* + * Restart the timer if it was running based on the expiry time + * we read, so that we don't push it back 2 periods. + */ + if (running) { + expires = ktime_add_ns(expires, + vcpu->arch.count_period); + hrtimer_start(&vcpu->arch.comparecount_timer, expires, + HRTIMER_MODE_ABS); + } + } + + return count; +} + +/** + * kvm_mips_read_count() - Read the current count value. + * @vcpu: Virtual CPU. + * + * Read the current guest CP0_Count value, taking into account whether the timer + * is stopped. + * + * Returns: The current guest CP0_Count value. + */ +u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + /* If count disabled just read static copy of count */ + if (kvm_mips_count_disabled(vcpu)) + return kvm_read_c0_guest_count(cop0); + + return kvm_mips_read_count_running(vcpu, ktime_get()); +} + +/** + * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer. + * @vcpu: Virtual CPU. + * @count: Output pointer for CP0_Count value at point of freeze. + * + * Freeze the hrtimer safely and return both the ktime and the CP0_Count value + * at the point it was frozen. It is guaranteed that any pending interrupts at + * the point it was frozen are handled, and none after that point. + * + * This is useful where the time/CP0_Count is needed in the calculation of the + * new parameters. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + * + * Returns: The ktime at the point of freeze. + */ +ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) +{ + ktime_t now; + + /* stop hrtimer before finding time */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + now = ktime_get(); + + /* find count at this point and handle pending hrtimer */ + *count = kvm_mips_read_count_running(vcpu, now); + + return now; +} + +/** + * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry. + * @vcpu: Virtual CPU. + * @now: ktime at point of resume. + * @count: CP0_Count at point of resume. + * + * Resumes the timer and updates the timer expiry based on @now and @count. + * This can be used in conjunction with kvm_mips_freeze_timer() when timer + * parameters need to be changed. + * + * It is guaranteed that a timer interrupt immediately after resume will be + * handled, but not if CP_Compare is exactly at @count. That case is already + * handled by kvm_mips_freeze_timer(). + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, + ktime_t now, u32 count) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 compare; + u64 delta; + ktime_t expire; + + /* Calculate timeout (wrap 0 to 2^32) */ + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(u32)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + expire = ktime_add_ns(now, delta); + + /* Update hrtimer to use new timeout */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS); +} + +/** + * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry. + * @vcpu: Virtual CPU. + * @before: Time before Count was saved, lower bound of drift calculation. + * @count: CP0_Count at point of restore. + * @min_drift: Minimum amount of drift permitted before correction. + * Must be <= 0. + * + * Restores the timer from a particular @count, accounting for drift. This can + * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is + * to be used for a period of time, but the exact ktime corresponding to the + * final Count that must be restored is not known. + * + * It is gauranteed that a timer interrupt immediately after restore will be + * handled, but not if CP0_Compare is exactly at @count. That case should + * already be handled when the hardware timer state is saved. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not + * stopped). + * + * Returns: Amount of correction to count_bias due to drift. + */ +int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, + u32 count, int min_drift) +{ + ktime_t now, count_time; + u32 now_count, before_count; + u64 delta; + int drift, ret = 0; + + /* Calculate expected count at before */ + before_count = vcpu->arch.count_bias + + kvm_mips_ktime_to_count(vcpu, before); + + /* + * Detect significantly negative drift, where count is lower than + * expected. Some negative drift is expected when hardware counter is + * set after kvm_mips_freeze_timer(), and it is harmless to allow the + * time to jump forwards a little, within reason. If the drift is too + * significant, adjust the bias to avoid a big Guest.CP0_Count jump. + */ + drift = count - before_count; + if (drift < min_drift) { + count_time = before; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Calculate expected count right now */ + now = ktime_get(); + now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + + /* + * Detect positive drift, where count is higher than expected, and + * adjust the bias to avoid guest time going backwards. + */ + drift = count - now_count; + if (drift > 0) { + count_time = now; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Subtract nanosecond delta to find ktime when count was read */ + delta = (u64)(u32)(now_count - count); + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + count_time = ktime_sub_ns(now, delta); + +resume: + /* Resume using the calculated ktime */ + kvm_mips_resume_hrtimer(vcpu, count_time, count); + return ret; +} + +/** + * kvm_mips_write_count() - Modify the count and update timer. + * @vcpu: Virtual CPU. + * @count: Guest CP0_Count value to set. + * + * Sets the CP0_Count value and updates the timer accordingly. + */ +void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + ktime_t now; + + /* Calculate bias */ + now = kvm_mips_count_time(vcpu); + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + if (kvm_mips_count_disabled(vcpu)) + /* The timer's disabled, adjust the static count */ + kvm_write_c0_guest_count(cop0, count); + else + /* Update timeout */ + kvm_mips_resume_hrtimer(vcpu, now, count); +} + +/** + * kvm_mips_init_count() - Initialise timer. + * @vcpu: Virtual CPU. + * @count_hz: Frequency of timer. + * + * Initialise the timer to the specified frequency, zero it, and set it going if + * it's enabled. + */ +void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz) +{ + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Starting at 0 */ + kvm_mips_write_count(vcpu, 0); +} + +/** + * kvm_mips_set_count_hz() - Update the frequency of the timer. + * @vcpu: Virtual CPU. + * @count_hz: Frequency of CP0_Count timer in Hz. + * + * Change the frequency of the CP0_Count timer. This is done atomically so that + * CP0_Count is continuous and no timer interrupt is lost. + * + * Returns: -EINVAL if @count_hz is out of range. + * 0 on success. + */ +int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int dc; + ktime_t now; + u32 count; + + /* ensure the frequency is in a sensible range... */ + if (count_hz <= 0 || count_hz > NSEC_PER_SEC) + return -EINVAL; + /* ... and has actually changed */ + if (vcpu->arch.count_hz == count_hz) + return 0; + + /* Safely freeze timer so we can keep it continuous */ + dc = kvm_mips_count_disabled(vcpu); + if (dc) { + now = kvm_mips_count_time(vcpu); + count = kvm_read_c0_guest_count(cop0); + } else { + now = kvm_mips_freeze_hrtimer(vcpu, &count); + } + + /* Update the frequency */ + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Calculate adjusted bias so dynamic count is unchanged */ + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + /* Update and resume hrtimer */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); + return 0; +} + +/** + * kvm_mips_write_compare() - Modify compare and update timer. + * @vcpu: Virtual CPU. + * @compare: New CP0_Compare value. + * @ack: Whether to acknowledge timer interrupt. + * + * Update CP0_Compare to a new value and update the timeout. + * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure + * any pending timer interrupt is preserved. + */ +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int dc; + u32 old_compare = kvm_read_c0_guest_compare(cop0); + s32 delta = compare - old_compare; + u32 cause; + ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */ + u32 count; + + /* if unchanged, must just be an ack */ + if (old_compare == compare) { + if (!ack) + return; + kvm_mips_callbacks->dequeue_timer_int(vcpu); + kvm_write_c0_guest_compare(cop0, compare); + return; + } + + /* + * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted + * too to prevent guest CP0_Count hitting guest CP0_Compare. + * + * The new GTOffset corresponds to the new value of CP0_Compare, and is + * set prior to it being written into the guest context. We disable + * preemption until the new value is written to prevent restore of a + * GTOffset corresponding to the old CP0_Compare value. + */ + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) { + preempt_disable(); + write_c0_gtoffset(compare - read_c0_count()); + back_to_back_c0_hazard(); + } + + /* freeze_hrtimer() takes care of timer interrupts <= count */ + dc = kvm_mips_count_disabled(vcpu); + if (!dc) + now = kvm_mips_freeze_hrtimer(vcpu, &count); + + if (ack) + kvm_mips_callbacks->dequeue_timer_int(vcpu); + else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + /* + * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so + * preserve guest CP0_Cause.TI if we don't want to ack it. + */ + cause = kvm_read_c0_guest_cause(cop0); + + kvm_write_c0_guest_compare(cop0, compare); + + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + if (delta > 0) + preempt_enable(); + + back_to_back_c0_hazard(); + + if (!ack && cause & CAUSEF_TI) + kvm_write_c0_guest_cause(cop0, cause); + } + + /* resume_hrtimer() takes care of timer interrupts > count */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); + + /* + * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change + * until after the new CP0_Compare is written, otherwise new guest + * CP0_Count could hit new guest CP0_Compare. + */ + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0) + write_c0_gtoffset(compare - read_c0_count()); +} + +/** + * kvm_mips_count_disable() - Disable count. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer. A timer interrupt on or before the final stop + * time will be handled but not after. + * + * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or + * count_ctl.DC has been set (count disabled). + * + * Returns: The time that the timer was stopped. + */ +static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 count; + ktime_t now; + + /* Stop hrtimer */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Set the static count from the dynamic count, handling pending TI */ + now = ktime_get(); + count = kvm_mips_read_count_running(vcpu, now); + kvm_write_c0_guest_count(cop0, count); + + return now; +} + +/** + * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or + * before the final stop time will be handled if the timer isn't disabled by + * count_ctl.DC, but not after. + * + * Assumes CP0_Cause.DC is clear (count enabled). + */ +void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + kvm_set_c0_guest_cause(cop0, CAUSEF_DC); + if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + kvm_mips_count_disable(vcpu); +} + +/** + * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after + * the start time will be handled if the timer isn't disabled by count_ctl.DC, + * potentially before even returning, so the caller should be careful with + * ordering of CP0_Cause modifications so as not to lose it. + * + * Assumes CP0_Cause.DC is set (count disabled). + */ +void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 count; + + kvm_clear_c0_guest_cause(cop0, CAUSEF_DC); + + /* + * Set the dynamic count to match the static count. + * This starts the hrtimer if count_ctl.DC allows it. + * Otherwise it conveniently updates the biases. + */ + count = kvm_read_c0_guest_count(cop0); + kvm_mips_write_count(vcpu, count); +} + +/** + * kvm_mips_set_count_ctl() - Update the count control KVM register. + * @vcpu: Virtual CPU. + * @count_ctl: Count control register new value. + * + * Set the count control KVM register. The timer is updated accordingly. + * + * Returns: -EINVAL if reserved bits are set. + * 0 on success. + */ +int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + s64 changed = count_ctl ^ vcpu->arch.count_ctl; + s64 delta; + ktime_t expire, now; + u32 count, compare; + + /* Only allow defined bits to be changed */ + if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC)) + return -EINVAL; + + /* Apply new value */ + vcpu->arch.count_ctl = count_ctl; + + /* Master CP0_Count disable */ + if (changed & KVM_REG_MIPS_COUNT_CTL_DC) { + /* Is CP0_Cause.DC already disabling CP0_Count? */ + if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) { + if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) + /* Just record the current time */ + vcpu->arch.count_resume = ktime_get(); + } else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) { + /* disable timer and record current time */ + vcpu->arch.count_resume = kvm_mips_count_disable(vcpu); + } else { + /* + * Calculate timeout relative to static count at resume + * time (wrap 0 to 2^32). + */ + count = kvm_read_c0_guest_count(cop0); + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(u32)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, + vcpu->arch.count_hz); + expire = ktime_add_ns(vcpu->arch.count_resume, delta); + + /* Handle pending interrupt */ + now = ktime_get(); + if (ktime_compare(now, expire) >= 0) + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* Resume hrtimer without changing bias */ + count = kvm_mips_read_count_running(vcpu, now); + kvm_mips_resume_hrtimer(vcpu, now, count); + } + } + + return 0; +} + +/** + * kvm_mips_set_count_resume() - Update the count resume KVM register. + * @vcpu: Virtual CPU. + * @count_resume: Count resume register new value. + * + * Set the count resume KVM register. + * + * Returns: -EINVAL if out of valid range (0..now). + * 0 on success. + */ +int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume) +{ + /* + * It doesn't make sense for the resume time to be in the future, as it + * would be possible for the next interrupt to be more than a full + * period in the future. + */ + if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get())) + return -EINVAL; + + vcpu->arch.count_resume = ns_to_ktime(count_resume); + return 0; +} + +/** + * kvm_mips_count_timeout() - Push timer forward on timeout. + * @vcpu: Virtual CPU. + * + * Handle an hrtimer event by push the hrtimer forward a period. + * + * Returns: The hrtimer_restart value to return to the hrtimer subsystem. + */ +enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu) +{ + /* Add the Count period to the current expiry time */ + hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer, + vcpu->arch.count_period); + return HRTIMER_RESTART; +} + +enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + enum emulation_result er = EMULATE_DONE; + + if (kvm_read_c0_guest_status(cop0) & ST0_ERL) { + kvm_clear_c0_guest_status(cop0, ST0_ERL); + vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); + } else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { + kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc, + kvm_read_c0_guest_epc(cop0)); + kvm_clear_c0_guest_status(cop0, ST0_EXL); + vcpu->arch.pc = kvm_read_c0_guest_epc(cop0); + + } else { + kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", + vcpu->arch.pc); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) +{ + kvm_debug("[%#lx] !!!WAIT!!! (%#lx)\n", vcpu->arch.pc, + vcpu->arch.pending_exceptions); + + ++vcpu->stat.wait_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); + if (!vcpu->arch.pending_exceptions) { + kvm_vz_lose_htimer(vcpu); + vcpu->arch.wait = 1; + kvm_vcpu_block(vcpu); + + /* + * We we are runnable, then definitely go off to user space to + * check if any I/O interrupts are pending. + */ + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + } + } + + return EMULATE_DONE; +} + +static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu, + unsigned long entryhi) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + int cpu, i; + u32 nasid = entryhi & KVM_ENTRYHI_ASID; + + if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) { + trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) & + KVM_ENTRYHI_ASID, nasid); + + /* + * Flush entries from the GVA page tables. + * Guest user page table will get flushed lazily on re-entry to + * guest user if the guest ASID actually changes. + */ + kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN); + + /* + * Regenerate/invalidate kernel MMU context. + * The user MMU context will be regenerated lazily on re-entry + * to guest user if the guest ASID actually changes. + */ + preempt_disable(); + cpu = smp_processor_id(); + get_new_mmu_context(kern_mm, cpu); + for_each_possible_cpu(i) + if (i != cpu) + cpu_context(i, kern_mm) = 0; + preempt_enable(); + } + kvm_write_c0_guest_entryhi(cop0, entryhi); +} + +enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_mips_tlb *tlb; + unsigned long pc = vcpu->arch.pc; + int index; + + index = kvm_read_c0_guest_index(cop0); + if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { + /* UNDEFINED */ + kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index); + index &= KVM_MIPS_GUEST_TLB_SIZE - 1; + } + + tlb = &vcpu->arch.guest_tlb[index]; + kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask); + kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]); + kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]); + kvm_mips_change_entryhi(vcpu, tlb->tlb_hi); + + return EMULATE_DONE; +} + +/** + * kvm_mips_invalidate_guest_tlb() - Indicates a change in guest MMU map. + * @vcpu: VCPU with changed mappings. + * @tlb: TLB entry being removed. + * + * This is called to indicate a single change in guest MMU mappings, so that we + * can arrange TLB flushes on this and other CPUs. + */ +static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, + struct kvm_mips_tlb *tlb) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + int cpu, i; + bool user; + + /* No need to flush for entries which are already invalid */ + if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V)) + return; + /* Don't touch host kernel page tables or TLB mappings */ + if ((unsigned long)tlb->tlb_hi > 0x7fffffff) + return; + /* User address space doesn't need flushing for KSeg2/3 changes */ + user = tlb->tlb_hi < KVM_GUEST_KSEG0; + + preempt_disable(); + + /* Invalidate page table entries */ + kvm_trap_emul_invalidate_gva(vcpu, tlb->tlb_hi & VPN2_MASK, user); + + /* + * Probe the shadow host TLB for the entry being overwritten, if one + * matches, invalidate it + */ + kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi, user, true); + + /* Invalidate the whole ASID on other CPUs */ + cpu = smp_processor_id(); + for_each_possible_cpu(i) { + if (i == cpu) + continue; + if (user) + cpu_context(i, user_mm) = 0; + cpu_context(i, kern_mm) = 0; + } + + preempt_enable(); +} + +/* Write Guest TLB Entry @ Index */ +enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int index = kvm_read_c0_guest_index(cop0); + struct kvm_mips_tlb *tlb = NULL; + unsigned long pc = vcpu->arch.pc; + + if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { + kvm_debug("%s: illegal index: %d\n", __func__, index); + kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); + index = (index & ~0x80000000) % KVM_MIPS_GUEST_TLB_SIZE; + } + + tlb = &vcpu->arch.guest_tlb[index]; + + kvm_mips_invalidate_guest_tlb(vcpu, tlb); + + tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); + tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); + tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0); + tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0); + + kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); + + return EMULATE_DONE; +} + +/* Write Guest TLB Entry @ Random Index */ +enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_mips_tlb *tlb = NULL; + unsigned long pc = vcpu->arch.pc; + int index; + + get_random_bytes(&index, sizeof(index)); + index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); + + tlb = &vcpu->arch.guest_tlb[index]; + + kvm_mips_invalidate_guest_tlb(vcpu, tlb); + + tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); + tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); + tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0); + tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0); + + kvm_debug("[%#lx] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0)); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + long entryhi = kvm_read_c0_guest_entryhi(cop0); + unsigned long pc = vcpu->arch.pc; + int index = -1; + + index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); + + kvm_write_c0_guest_index(cop0, index); + + kvm_debug("[%#lx] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi, + index); + + return EMULATE_DONE; +} + +/** + * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config1 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = 0; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; +} + +/** + * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config3 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config4 and ULRI are optional */ + unsigned int mask = MIPS_CONF_M | MIPS_CONF3_ULRI; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; +} + +/** + * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config4 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config5 is optional */ + unsigned int mask = MIPS_CONF_M; + + /* KScrExist */ + mask |= 0xfc << MIPS_CONF4_KSCREXIST_SHIFT; + + return mask; +} + +/** + * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config5 CP0 + * register, by the guest itself. + */ +unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = 0; + + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE; + /* We don't support UFR or UFE */ + } + + return mask; +} + +enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + enum emulation_result er = EMULATE_DONE; + u32 rt, rd, sel; + unsigned long curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + if (inst.co_format.co) { + switch (inst.co_format.func) { + case tlbr_op: /* Read indexed TLB entry */ + er = kvm_mips_emul_tlbr(vcpu); + break; + case tlbwi_op: /* Write indexed */ + er = kvm_mips_emul_tlbwi(vcpu); + break; + case tlbwr_op: /* Write random */ + er = kvm_mips_emul_tlbwr(vcpu); + break; + case tlbp_op: /* TLB Probe */ + er = kvm_mips_emul_tlbp(vcpu); + break; + case rfe_op: + kvm_err("!!!COP0_RFE!!!\n"); + break; + case eret_op: + er = kvm_mips_emul_eret(vcpu); + goto dont_update_pc; + case wait_op: + er = kvm_mips_emul_wait(vcpu); + break; + case hypcall_op: + er = kvm_mips_emul_hypcall(vcpu, inst); + break; + } + } else { + rt = inst.c0r_format.rt; + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + switch (inst.c0r_format.rs) { + case mfc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + /* Get reg */ + if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { + vcpu->arch.gprs[rt] = + (s32)kvm_mips_read_count(vcpu); + } else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) { + vcpu->arch.gprs[rt] = 0x0; +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + kvm_mips_trans_mfc0(inst, opc, vcpu); +#endif + } else { + vcpu->arch.gprs[rt] = (s32)cop0->reg[rd][sel]; + +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + kvm_mips_trans_mfc0(inst, opc, vcpu); +#endif + } + + trace_kvm_hwr(vcpu, KVM_TRACE_MFC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); + break; + + case dmfc_op: + vcpu->arch.gprs[rt] = cop0->reg[rd][sel]; + + trace_kvm_hwr(vcpu, KVM_TRACE_DMFC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); + break; + + case mtc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); + + if ((rd == MIPS_CP0_TLB_INDEX) + && (vcpu->arch.gprs[rt] >= + KVM_MIPS_GUEST_TLB_SIZE)) { + kvm_err("Invalid TLB Index: %ld", + vcpu->arch.gprs[rt]); + er = EMULATE_FAIL; + break; + } + if ((rd == MIPS_CP0_PRID) && (sel == 1)) { + /* + * Preserve core number, and keep the exception + * base in guest KSeg0. + */ + kvm_change_c0_guest_ebase(cop0, 0x1ffff000, + vcpu->arch.gprs[rt]); + } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { + kvm_mips_change_entryhi(vcpu, + vcpu->arch.gprs[rt]); + } + /* Are we writing to COUNT */ + else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { + kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); + goto done; + } else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) { + /* If we are writing to COMPARE */ + /* Clear pending timer interrupt, if any */ + kvm_mips_write_compare(vcpu, + vcpu->arch.gprs[rt], + true); + } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { + unsigned int old_val, val, change; + + old_val = kvm_read_c0_guest_status(cop0); + val = vcpu->arch.gprs[rt]; + change = val ^ old_val; + + /* Make sure that the NMI bit is never set */ + val &= ~ST0_NMI; + + /* + * Don't allow CU1 or FR to be set unless FPU + * capability enabled and exists in guest + * configuration. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't + * support it. + */ + if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + + /* Handle changes in FPU mode */ + preempt_disable(); + + /* + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. + */ + if (change & ST0_FR) + kvm_drop_fpu(vcpu); + + /* + * If MSA state is already live, it is undefined + * how it interacts with FR=0 FPU state, and we + * don't want to hit reserved instruction + * exceptions trying to save the MSA state later + * when CU=1 && FR=1, so play it safe and save + * it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + /* + * Propagate CU1 (FPU enable) changes + * immediately if the FPU context is already + * loaded. When disabling we leave the context + * loaded so it can be quickly enabled again in + * the near future. + */ + if (change & ST0_CU1 && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) + change_c0_status(ST0_CU1, val); + + preempt_enable(); + + kvm_write_c0_guest_status(cop0, val); + +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + /* + * If FPU present, we need CU1/FR bits to take + * effect fairly soon. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + kvm_mips_trans_mtc0(inst, opc, vcpu); +#endif + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + unsigned int old_val, val, change, wrmask; + + old_val = kvm_read_c0_guest_config5(cop0); + val = vcpu->arch.gprs[rt]; + + /* Only a few bits are writable in Config5 */ + wrmask = kvm_mips_config5_wrmask(vcpu); + change = (val ^ old_val) & wrmask; + val = old_val ^ change; + + + /* Handle changes in FPU/MSA modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + /* + * Propagate MSAEn changes immediately if the + * MSA context is already loaded. When disabling + * we leave the context loaded so it can be + * quickly enabled again in the near future. + */ + if (change & MIPS_CONF5_MSAEN && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + change_c0_config5(MIPS_CONF5_MSAEN, + val); + + preempt_enable(); + + kvm_write_c0_guest_config5(cop0, val); + } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { + u32 old_cause, new_cause; + + old_cause = kvm_read_c0_guest_cause(cop0); + new_cause = vcpu->arch.gprs[rt]; + /* Update R/W bits */ + kvm_change_c0_guest_cause(cop0, 0x08800300, + new_cause); + /* DC bit enabling/disabling timer? */ + if ((old_cause ^ new_cause) & CAUSEF_DC) { + if (new_cause & CAUSEF_DC) + kvm_mips_count_disable_cause(vcpu); + else + kvm_mips_count_enable_cause(vcpu); + } + } else if ((rd == MIPS_CP0_HWRENA) && (sel == 0)) { + u32 mask = MIPS_HWRENA_CPUNUM | + MIPS_HWRENA_SYNCISTEP | + MIPS_HWRENA_CC | + MIPS_HWRENA_CCRES; + + if (kvm_read_c0_guest_config3(cop0) & + MIPS_CONF3_ULRI) + mask |= MIPS_HWRENA_ULR; + cop0->reg[rd][sel] = vcpu->arch.gprs[rt] & mask; + } else { + cop0->reg[rd][sel] = vcpu->arch.gprs[rt]; +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + kvm_mips_trans_mtc0(inst, opc, vcpu); +#endif + } + break; + + case dmtc_op: + kvm_err("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", + vcpu->arch.pc, rt, rd, sel); + trace_kvm_hwr(vcpu, KVM_TRACE_DMTC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); + er = EMULATE_FAIL; + break; + + case mfmc0_op: +#ifdef KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[MIPS_CP0_STATUS][0]++; +#endif + if (rt != 0) + vcpu->arch.gprs[rt] = + kvm_read_c0_guest_status(cop0); + /* EI */ + if (inst.mfmc0_format.sc) { + kvm_debug("[%#lx] mfmc0_op: EI\n", + vcpu->arch.pc); + kvm_set_c0_guest_status(cop0, ST0_IE); + } else { + kvm_debug("[%#lx] mfmc0_op: DI\n", + vcpu->arch.pc); + kvm_clear_c0_guest_status(cop0, ST0_IE); + } + + break; + + case wrpgpr_op: + { + u32 css = cop0->reg[MIPS_CP0_STATUS][2] & 0xf; + u32 pss = + (cop0->reg[MIPS_CP0_STATUS][2] >> 6) & 0xf; + /* + * We don't support any shadow register sets, so + * SRSCtl[PSS] == SRSCtl[CSS] = 0 + */ + if (css || pss) { + er = EMULATE_FAIL; + break; + } + kvm_debug("WRPGPR[%d][%d] = %#lx\n", pss, rd, + vcpu->arch.gprs[rt]); + vcpu->arch.gprs[rd] = vcpu->arch.gprs[rt]; + } + break; + default: + kvm_err("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", + vcpu->arch.pc, inst.c0r_format.rs); + er = EMULATE_FAIL; + break; + } + } + +done: + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; + +dont_update_pc: + /* + * This is for special instructions whose emulation + * updates the PC, so do not overwrite the PC under + * any circumstances + */ + + return er; +} + +enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, + u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + u32 rt; + void *data = run->mmio.data; + unsigned long curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + rt = inst.i_format.rt; + + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + goto out_fail; + + switch (inst.i_format.opcode) { +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) + case sd_op: + run->mmio.len = 8; + *(u64 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; +#endif + + case sw_op: + run->mmio.len = 4; + *(u32 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + + case sh_op: + run->mmio.len = 2; + *(u16 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u16 *)data); + break; + + case sb_op: + run->mmio.len = 1; + *(u8 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u8 *)data); + break; + + default: + kvm_err("Store not yet supported (inst=0x%08x)\n", + inst.word); + goto out_fail; + } + + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + return EMULATE_DO_MMIO; + +out_fail: + /* Rollback PC if emulation was unsuccessful */ + vcpu->arch.pc = curr_pc; + return EMULATE_FAIL; +} + +enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, + u32 cause, struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + unsigned long curr_pc; + u32 op, rt; + + rt = inst.i_format.rt; + op = inst.i_format.opcode; + + /* + * Find the resume PC now while we have safe and easy access to the + * prior branch instruction, and save it for + * kvm_mips_complete_mmio_load() to restore later. + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + vcpu->arch.io_pc = vcpu->arch.pc; + vcpu->arch.pc = curr_pc; + + vcpu->arch.io_gpr = rt; + + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + return EMULATE_FAIL; + + vcpu->mmio_needed = 2; /* signed */ + switch (op) { +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) + case ld_op: + run->mmio.len = 8; + break; + + case lwu_op: + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ +#endif + case lw_op: + run->mmio.len = 4; + break; + + case lhu_op: + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ + case lh_op: + run->mmio.len = 2; + break; + + case lbu_op: + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ + case lb_op: + run->mmio.len = 1; + break; + + default: + kvm_err("Load not yet supported (inst=0x%08x)\n", + inst.word); + vcpu->mmio_needed = 0; + return EMULATE_FAIL; + } + + run->mmio.is_write = 0; + vcpu->mmio_is_write = 0; + return EMULATE_DO_MMIO; +} + +#ifndef CONFIG_KVM_MIPS_VZ +static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), + unsigned long curr_pc, + unsigned long addr, + struct kvm_run *run, + struct kvm_vcpu *vcpu, + u32 cause) +{ + int err; + + for (;;) { + /* Carefully attempt the cache operation */ + kvm_trap_emul_gva_lockless_begin(vcpu); + err = fn(addr); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (likely(!err)) + return EMULATE_DONE; + + /* + * Try to handle the fault and retry, maybe we just raced with a + * GVA invalidation. + */ + switch (kvm_trap_emul_gva_fault(vcpu, addr, false)) { + case KVM_MIPS_GVA: + case KVM_MIPS_GPA: + /* bad virtual or physical address */ + return EMULATE_FAIL; + case KVM_MIPS_TLB: + /* no matching guest TLB */ + vcpu->arch.host_cp0_badvaddr = addr; + vcpu->arch.pc = curr_pc; + kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); + return EMULATE_EXCEPT; + case KVM_MIPS_TLBINV: + /* invalid matching guest TLB */ + vcpu->arch.host_cp0_badvaddr = addr; + vcpu->arch.pc = curr_pc; + kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); + return EMULATE_EXCEPT; + default: + break; + }; + } +} + +enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + u32 cache, op_inst, op, base; + s16 offset; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long va; + unsigned long curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + base = inst.i_format.rs; + op_inst = inst.i_format.rt; + if (cpu_has_mips_r6) + offset = inst.spec3_format.simmediate; + else + offset = inst.i_format.simmediate; + cache = op_inst & CacheOp_Cache; + op = op_inst & CacheOp_Op; + + va = arch->gprs[base] + offset; + + kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); + + /* + * Treat INDEX_INV as a nop, basically issued by Linux on startup to + * invalidate the caches entirely by stepping through all the + * ways/indexes + */ + if (op == Index_Writeback_Inv) { + kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, + arch->gprs[base], offset); + + if (cache == Cache_D) { +#ifdef CONFIG_CPU_R4K_CACHE_TLB + r4k_blast_dcache(); +#else + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* locally flush icache */ + local_flush_icache_range(0, 0); + break; + default: + __flush_cache_all(); + break; + } +#endif + } else if (cache == Cache_I) { +#ifdef CONFIG_CPU_R4K_CACHE_TLB + r4k_blast_icache(); +#else + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* locally flush icache */ + local_flush_icache_range(0, 0); + break; + default: + flush_icache_all(); + break; + } +#endif + } else { + kvm_err("%s: unsupported CACHE INDEX operation\n", + __func__); + return EMULATE_FAIL; + } + +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + kvm_mips_trans_cache_index(inst, opc, vcpu); +#endif + goto done; + } + + /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */ + if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) { + /* + * Perform the dcache part of icache synchronisation on the + * guest's behalf. + */ + er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + /* + * Replace the CACHE instruction, with a SYNCI, not the same, + * but avoids a trap + */ + kvm_mips_trans_cache_va(inst, opc, vcpu); +#endif + } else if (op_inst == Hit_Invalidate_I) { + /* Perform the icache synchronisation on the guest's behalf */ + er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; + er = kvm_mips_guest_cache_op(protected_flush_icache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; + +#ifdef CONFIG_KVM_MIPS_DYN_TRANS + /* Replace the CACHE instruction, with a SYNCI */ + kvm_mips_trans_cache_va(inst, opc, vcpu); +#endif + } else { + kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); + er = EMULATE_FAIL; + } + +done: + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; + /* Guest exception needs guest to resume */ + if (er == EMULATE_EXCEPT) + er = EMULATE_DONE; + + return er; +} + +enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err; + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + switch (inst.r_format.opcode) { + case cop0_op: + er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); + break; + +#ifndef CONFIG_CPU_MIPSR6 + case cache_op: + ++vcpu->stat.cache_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_mips_emulate_cache(inst, opc, cause, run, vcpu); + break; +#else + case spec3_op: + switch (inst.spec3_format.func) { + case cache6_op: + ++vcpu->stat.cache_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_mips_emulate_cache(inst, opc, cause, run, + vcpu); + break; + default: + goto unknown; + }; + break; +unknown: +#endif + + default: + kvm_err("Instruction emulation not supported (%p/%#x)\n", opc, + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + er = EMULATE_FAIL; + break; + } + + return er; +} +#endif /* CONFIG_KVM_MIPS_VZ */ + +/** + * kvm_mips_guest_exception_base() - Find guest exception vector base address. + * + * Returns: The base address of the current guest exception vector, taking + * both Guest.CP0_Status.BEV and Guest.CP0_EBase into account. + */ +long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + if (kvm_read_c0_guest_status(cop0) & ST0_BEV) + return KVM_GUEST_CKSEG1ADDR(0x1fc00200); + else + return kvm_read_c0_guest_ebase(cop0) & MIPS_EBASE_BASE; +} + +enum emulation_result kvm_mips_emulate_syscall(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering SYSCALL @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_SYS << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long entryhi = (vcpu->arch. host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("[EXL == 0] delivering TLB MISS @ pc %#lx\n", + arch->pc); + + /* set pc to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; + + } else { + kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", + arch->pc); + + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + } + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_TLBL << CAUSEB_EXCCODE)); + + /* setup badvaddr, context and entryhi registers for the guest */ + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + /* XXXKYMA: is the context register used by linux??? */ + kvm_write_c0_guest_entryhi(cop0, entryhi); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long entryhi = + (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n", + arch->pc); + } else { + kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", + arch->pc); + } + + /* set pc to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_TLBL << CAUSEB_EXCCODE)); + + /* setup badvaddr, context and entryhi registers for the guest */ + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + /* XXXKYMA: is the context register used by linux??? */ + kvm_write_c0_guest_entryhi(cop0, entryhi); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n", + arch->pc); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; + } else { + kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", + arch->pc); + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + } + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_TLBS << CAUSEB_EXCCODE)); + + /* setup badvaddr, context and entryhi registers for the guest */ + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + /* XXXKYMA: is the context register used by linux??? */ + kvm_write_c0_guest_entryhi(cop0, entryhi); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n", + arch->pc); + } else { + kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", + arch->pc); + } + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_TLBS << CAUSEB_EXCCODE)); + + /* setup badvaddr, context and entryhi registers for the guest */ + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + /* XXXKYMA: is the context register used by linux??? */ + kvm_write_c0_guest_entryhi(cop0, entryhi); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + struct kvm_vcpu_arch *arch = &vcpu->arch; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n", + arch->pc); + } else { + kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n", + arch->pc); + } + + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_MOD << CAUSEB_EXCCODE)); + + /* setup badvaddr, context and entryhi registers for the guest */ + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + /* XXXKYMA: is the context register used by linux??? */ + kvm_write_c0_guest_entryhi(cop0, entryhi); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + } + + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_CPU << CAUSEB_EXCCODE)); + kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE)); + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_ri_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering RI @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_RI << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver RI when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_bp_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering BP @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_BP << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver BP when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_trap_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_TR << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver TRAP when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_MSAFPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_FPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver FPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (EXCCODE_MSADIS << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + + } else { + kvm_err("Trying to deliver MSADIS when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + unsigned long curr_pc; + union mips_instruction inst; + int err; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + kvm_err("%s: Cannot get inst @ %p (%d)\n", __func__, opc, err); + return EMULATE_FAIL; + } + + if (inst.r_format.opcode == spec3_op && + inst.r_format.func == rdhwr_op && + inst.r_format.rs == 0 && + (inst.r_format.re >> 3) == 0) { + int usermode = !KVM_GUEST_KERNEL_MODE(vcpu); + int rd = inst.r_format.rd; + int rt = inst.r_format.rt; + int sel = inst.r_format.re & 0x7; + + /* If usermode, check RDHWR rd is allowed by guest HWREna */ + if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) { + kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n", + rd, opc); + goto emulate_ri; + } + switch (rd) { + case MIPS_HWR_CPUNUM: /* CPU number */ + arch->gprs[rt] = vcpu->vcpu_id; + break; + case MIPS_HWR_SYNCISTEP: /* SYNCI length */ + arch->gprs[rt] = min(current_cpu_data.dcache.linesz, + current_cpu_data.icache.linesz); + break; + case MIPS_HWR_CC: /* Read count register */ + arch->gprs[rt] = (s32)kvm_mips_read_count(vcpu); + break; + case MIPS_HWR_CCRES: /* Count register resolution */ + switch (current_cpu_data.cputype) { + case CPU_20KC: + case CPU_25KF: + arch->gprs[rt] = 1; + break; + default: + arch->gprs[rt] = 2; + } + break; + case MIPS_HWR_ULR: /* Read UserLocal register */ + arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); + break; + + default: + kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc); + goto emulate_ri; + } + + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), + vcpu->arch.gprs[rt]); + } else { + kvm_debug("Emulate RI not supported @ %p: %#x\n", + opc, inst.word); + goto emulate_ri; + } + + return EMULATE_DONE; + +emulate_ri: + /* + * Rollback PC (if in branch delay slot then the PC already points to + * branch target), and pass the RI exception to the guest OS. + */ + vcpu->arch.pc = curr_pc; + return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); +} + +enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; + enum emulation_result er = EMULATE_DONE; + + if (run->mmio.len > sizeof(*gpr)) { + kvm_err("Bad MMIO length: %d", run->mmio.len); + er = EMULATE_FAIL; + goto done; + } + + /* Restore saved resume PC */ + vcpu->arch.pc = vcpu->arch.io_pc; + + switch (run->mmio.len) { + case 8: + *gpr = *(s64 *)run->mmio.data; + break; + + case 4: + if (vcpu->mmio_needed == 2) + *gpr = *(s32 *)run->mmio.data; + else + *gpr = *(u32 *)run->mmio.data; + break; + + case 2: + if (vcpu->mmio_needed == 2) + *gpr = *(s16 *) run->mmio.data; + else + *gpr = *(u16 *)run->mmio.data; + + break; + case 1: + if (vcpu->mmio_needed == 2) + *gpr = *(s8 *) run->mmio.data; + else + *gpr = *(u8 *) run->mmio.data; + break; + } + +done: + return er; +} + +static enum emulation_result kvm_mips_emulate_exc(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_change_c0_guest_cause(cop0, (0xff), + (exccode << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); + + kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n", + exccode, kvm_read_c0_guest_epc(cop0), + kvm_read_c0_guest_badvaddr(cop0)); + } else { + kvm_err("Trying to deliver EXC when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_check_privilege(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + + int usermode = !KVM_GUEST_KERNEL_MODE(vcpu); + + if (usermode) { + switch (exccode) { + case EXCCODE_INT: + case EXCCODE_SYS: + case EXCCODE_BP: + case EXCCODE_RI: + case EXCCODE_TR: + case EXCCODE_MSAFPE: + case EXCCODE_FPE: + case EXCCODE_MSADIS: + break; + + case EXCCODE_CPU: + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 0) + er = EMULATE_PRIV_FAIL; + break; + + case EXCCODE_MOD: + break; + + case EXCCODE_TLBL: + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ + if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { + kvm_debug("%s: LD MISS @ %#lx\n", __func__, + badvaddr); + cause &= ~0xff; + cause |= (EXCCODE_ADEL << CAUSEB_EXCCODE); + er = EMULATE_PRIV_FAIL; + } + break; + + case EXCCODE_TLBS: + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ + if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { + kvm_debug("%s: ST MISS @ %#lx\n", __func__, + badvaddr); + cause &= ~0xff; + cause |= (EXCCODE_ADES << CAUSEB_EXCCODE); + er = EMULATE_PRIV_FAIL; + } + break; + + case EXCCODE_ADES: + kvm_debug("%s: address error ST @ %#lx\n", __func__, + badvaddr); + if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { + cause &= ~0xff; + cause |= (EXCCODE_TLBS << CAUSEB_EXCCODE); + } + er = EMULATE_PRIV_FAIL; + break; + case EXCCODE_ADEL: + kvm_debug("%s: address error LD @ %#lx\n", __func__, + badvaddr); + if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { + cause &= ~0xff; + cause |= (EXCCODE_TLBL << CAUSEB_EXCCODE); + } + er = EMULATE_PRIV_FAIL; + break; + default: + er = EMULATE_PRIV_FAIL; + break; + } + } + + if (er == EMULATE_PRIV_FAIL) + kvm_mips_emulate_exc(cause, opc, run, vcpu); + + return er; +} + +/* + * User Address (UA) fault, this could happen if + * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this + * case we pass on the fault to the guest kernel and let it handle it. + * (2) TLB entry is present in the Guest TLB but not in the shadow, in this + * case we inject the TLB from the Guest TLB into the shadow host TLB + */ +enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, + u32 *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu, + bool write_fault) +{ + enum emulation_result er = EMULATE_DONE; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + unsigned long va = vcpu->arch.host_cp0_badvaddr; + int index; + + kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx\n", + vcpu->arch.host_cp0_badvaddr); + + /* + * KVM would not have got the exception if this entry was valid in the + * shadow host TLB. Check the Guest TLB, if the entry is not there then + * send the guest an exception. The guest exc handler should then inject + * an entry into the guest TLB. + */ + index = kvm_mips_guest_tlb_lookup(vcpu, + (va & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & + KVM_ENTRYHI_ASID)); + if (index < 0) { + if (exccode == EXCCODE_TLBL) { + er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu); + } else if (exccode == EXCCODE_TLBS) { + er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu); + } else { + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); + er = EMULATE_FAIL; + } + } else { + struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; + + /* + * Check if the entry is valid, if not then setup a TLB invalid + * exception to the guest + */ + if (!TLB_IS_VALID(*tlb, va)) { + if (exccode == EXCCODE_TLBL) { + er = kvm_mips_emulate_tlbinv_ld(cause, opc, run, + vcpu); + } else if (exccode == EXCCODE_TLBS) { + er = kvm_mips_emulate_tlbinv_st(cause, opc, run, + vcpu); + } else { + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); + er = EMULATE_FAIL; + } + } else { + kvm_debug("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", + tlb->tlb_hi, tlb->tlb_lo[0], tlb->tlb_lo[1]); + /* + * OK we have a Guest TLB entry, now inject it into the + * shadow host TLB + */ + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, va, + write_fault)) { + kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, va, index, vcpu, + read_c0_entryhi()); + er = EMULATE_FAIL; + } + } + } + + return er; +} diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c new file mode 100644 index 000000000..16e1c93b4 --- /dev/null +++ b/arch/mips/kvm/entry.c @@ -0,0 +1,943 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Generation of main entry point for the guest, exception handling. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + * + * Copyright (C) 2016 Imagination Technologies Ltd. + */ + +#include <linux/kvm_host.h> +#include <linux/log2.h> +#include <asm/mmu_context.h> +#include <asm/msa.h> +#include <asm/setup.h> +#include <asm/tlbex.h> +#include <asm/uasm.h> + +/* Register names */ +#define ZERO 0 +#define AT 1 +#define V0 2 +#define V1 3 +#define A0 4 +#define A1 5 + +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ + +#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 +#define T0 12 +#define T1 13 +#define T2 14 +#define T3 15 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ + +#define S0 16 +#define S1 17 +#define T9 25 +#define K0 26 +#define K1 27 +#define GP 28 +#define SP 29 +#define RA 31 + +/* Some CP0 registers */ +#define C0_PWBASE 5, 5 +#define C0_HWRENA 7, 0 +#define C0_BADVADDR 8, 0 +#define C0_BADINSTR 8, 1 +#define C0_BADINSTRP 8, 2 +#define C0_ENTRYHI 10, 0 +#define C0_GUESTCTL1 10, 4 +#define C0_STATUS 12, 0 +#define C0_GUESTCTL0 12, 6 +#define C0_CAUSE 13, 0 +#define C0_EPC 14, 0 +#define C0_EBASE 15, 1 +#define C0_CONFIG5 16, 5 +#define C0_DDATA_LO 28, 3 +#define C0_ERROREPC 30, 0 + +#define CALLFRAME_SIZ 32 + +#ifdef CONFIG_64BIT +#define ST0_KX_IF_64 ST0_KX +#else +#define ST0_KX_IF_64 0 +#endif + +static unsigned int scratch_vcpu[2] = { C0_DDATA_LO }; +static unsigned int scratch_tmp[2] = { C0_ERROREPC }; + +enum label_id { + label_fpu_1 = 1, + label_msa_1, + label_return_to_host, + label_kernel_asid, + label_exit_common, +}; + +UASM_L_LA(_fpu_1) +UASM_L_LA(_msa_1) +UASM_L_LA(_return_to_host) +UASM_L_LA(_kernel_asid) +UASM_L_LA(_exit_common) + +static void *kvm_mips_build_enter_guest(void *addr); +static void *kvm_mips_build_ret_from_exit(void *addr); +static void *kvm_mips_build_ret_to_guest(void *addr); +static void *kvm_mips_build_ret_to_host(void *addr); + +/* + * The version of this function in tlbex.c uses current_cpu_type(), but for KVM + * we assume symmetry. + */ +static int c0_kscratch(void) +{ + switch (boot_cpu_type()) { + case CPU_XLP: + case CPU_XLR: + return 22; + default: + return 31; + } +} + +/** + * kvm_mips_entry_setup() - Perform global setup for entry code. + * + * Perform global setup for entry code, such as choosing a scratch register. + * + * Returns: 0 on success. + * -errno on failure. + */ +int kvm_mips_entry_setup(void) +{ + /* + * We prefer to use KScratchN registers if they are available over the + * defaults above, which may not work on all cores. + */ + unsigned int kscratch_mask = cpu_data[0].kscratch_mask; + + if (pgd_reg != -1) + kscratch_mask &= ~BIT(pgd_reg); + + /* Pick a scratch register for storing VCPU */ + if (kscratch_mask) { + scratch_vcpu[0] = c0_kscratch(); + scratch_vcpu[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_vcpu[1]); + } + + /* Pick a scratch register to use as a temp for saving state */ + if (kscratch_mask) { + scratch_tmp[0] = c0_kscratch(); + scratch_tmp[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_tmp[1]); + } + + return 0; +} + +static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* Save the VCPU scratch register value in cp0_epc of the stack frame */ + UASM_i_MFC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + + /* Save the temp scratch register value in cp0_cause of stack frame */ + if (scratch_tmp[0] == c0_kscratch()) { + UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + } +} + +static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* + * Restore host scratch register values saved by + * kvm_mips_build_save_scratch(). + */ + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + + if (scratch_tmp[0] == c0_kscratch()) { + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + } +} + +/** + * build_set_exc_base() - Assemble code to write exception base address. + * @p: Code buffer pointer. + * @reg: Source register (generated code may set WG bit in @reg). + * + * Assemble code to modify the exception base address in the EBase register, + * using the appropriately sized access and setting the WG bit if necessary. + */ +static inline void build_set_exc_base(u32 **p, unsigned int reg) +{ + if (cpu_has_ebase_wg) { + /* Set WG so that all the bits get written */ + uasm_i_ori(p, reg, reg, MIPS_EBASE_WG); + UASM_i_MTC0(p, reg, C0_EBASE); + } else { + uasm_i_mtc0(p, reg, C0_EBASE); + } +} + +/** + * kvm_mips_build_vcpu_run() - Assemble function to start running a guest VCPU. + * @addr: Address to start writing code. + * + * Assemble the start of the vcpu_run function to run a guest VCPU. The function + * conforms to the following prototype: + * + * int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); + * + * The exit from the guest and return to the caller is handled by the code + * generated by kvm_mips_build_ret_to_host(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_vcpu_run(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* + * A0: run + * A1: vcpu + */ + + /* k0/k1 not being used in host kernel context */ + UASM_i_ADDIU(&p, K1, SP, -(int)sizeof(struct pt_regs)); + for (i = 16; i < 32; ++i) { + if (i == 24) + i = 28; + UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Save host status */ + uasm_i_mfc0(&p, V0, C0_STATUS); + UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1); + + /* Save scratch registers, will be used to store pointer to vcpu etc */ + kvm_mips_build_save_scratch(&p, V1, K1); + + /* VCPU scratch register has pointer to vcpu */ + UASM_i_MTC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Offset into vcpu->arch */ + UASM_i_ADDIU(&p, K1, A1, offsetof(struct kvm_vcpu, arch)); + + /* + * Save the host stack to VCPU, used for exception processing + * when we exit from the Guest + */ + UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Save the kernel gp as well */ + UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ + UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + /* load up the new EBASE */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + build_set_exc_base(&p, K0); + + /* + * Now that the new EBASE has been loaded, unset BEV, set + * interrupt mask as it was but make sure that timer interrupts + * are enabled + */ + uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64); + uasm_i_andi(&p, V0, V0, ST0_IM); + uasm_i_or(&p, K0, K0, V0); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_enter_guest() - Assemble code to resume guest execution. + * @addr: Address to start writing code. + * + * Assemble the code to resume guest execution. This code is common between the + * initial entry into the guest from the host, and returning from the exit + * handler back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_enter_guest(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label __maybe_unused *l = labels; + struct uasm_reloc __maybe_unused *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Set Guest EPC */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); + UASM_i_MTC0(&p, T0, C0_EPC); + +#ifdef CONFIG_KVM_MIPS_VZ + /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */ + UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1); + + /* + * Set up KVM GPA pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + * + * We keep S0 pointing at struct kvm so we can load the ASID below. + */ + UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) - + (int)offsetof(struct kvm_vcpu, arch), K1); + UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Set GM bit to setup eret to VZ guest context */ + uasm_i_addiu(&p, V1, ZERO, 1); + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + if (cpu_has_guestid) { + /* + * Set root mode GuestID, so that root TLB refill handler can + * use the correct GuestID in the root TLB. + */ + + /* Get current GuestID */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = GuestCtl1.ID */ + uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT, + MIPS_GCTL1_ID_WIDTH); + uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + + /* GuestID handles dealiasing so we don't need to touch ASID */ + goto skip_asid_restore; + } + + /* Root ASID Dealias (RAD) */ + + /* Save host ASID */ + UASM_i_MFC0(&p, K0, C0_ENTRYHI); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + + /* Set the root ASID for the Guest */ + UASM_i_ADDIU(&p, T1, S0, + offsetof(struct kvm, arch.gpa_mm.context.asid)); +#else + /* Set the ASID for the Guest Kernel or User */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); + UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), + T0); + uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL); + uasm_i_xori(&p, T0, T0, KSU_USER); + uasm_il_bnez(&p, &r, T0, label_kernel_asid); + UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, + guest_kernel_mm.context.asid)); + /* else user */ + UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, + guest_user_mm.context.asid)); + uasm_l_kernel_asid(&l, p); +#endif + + /* t1: contains the base of the ASID array, need to get the cpu id */ + /* smp_processor_id */ + uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); + /* index the ASID array */ + uasm_i_sll(&p, T2, T2, ilog2(sizeof(long))); + UASM_i_ADDU(&p, T3, T1, T2); + UASM_i_LW(&p, K0, 0, T3); +#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE + /* + * reuse ASID array offset + * cpuinfo_mips is a multiple of sizeof(long) + */ + uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long)); + uasm_i_mul(&p, T2, T2, T3); + + UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); + UASM_i_ADDU(&p, AT, AT, T2); + UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT); + uasm_i_and(&p, K0, K0, T2); +#else + uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); +#endif + +#ifndef CONFIG_KVM_MIPS_VZ + /* + * Set up KVM T&E GVA pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - but skips write into CP0_PWBase for now + */ + UASM_i_LW(&p, A0, (int)offsetof(struct mm_struct, pgd) - + (int)offsetof(struct mm_struct, context.asid), T1); + + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + uasm_i_mtc0(&p, K0, C0_ENTRYHI); +#else + /* Set up KVM VZ root ASID (!guestid) */ + uasm_i_mtc0(&p, K0, C0_ENTRYHI); +skip_asid_restore: +#endif + uasm_i_ehb(&p); + + /* Disable RDHWR access */ + uasm_i_mtc0(&p, ZERO, C0_HWRENA); + + /* load the guest context from VCPU and return */ + for (i = 1; i < 32; ++i) { + /* Guest k0/k1 loaded later */ + if (i == K0 || i == K1) + continue; + UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* Restore hi/lo */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1); + uasm_i_mthi(&p, K0); + + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1); + uasm_i_mtlo(&p, K0); +#endif + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Jump to guest */ + uasm_i_eret(&p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble TLB refill exception fast path handler for guest execution. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + + /* + * Some of the common tlbex code uses current_cpu_type(). For KVM we + * assume symmetry and just disable preemption to silence the warning. + */ + preempt_disable(); + + /* + * Now for the actual refill bit. A lot of this can be common with the + * Linux TLB refill handler, however we don't need to handle so many + * cases. We only need to handle user mode refills, and user mode runs + * with 32-bit addressing. + * + * Therefore the branch to label_vmalloc generated by build_get_pmde64() + * that isn't resolved should never actually get taken and is harmless + * to leave in place for now. + */ + +#ifdef CONFIG_64BIT + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +#else + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + + /* we don't support huge pages yet */ + + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); + + preempt_enable(); + + /* Get the VCPU pointer from the VCPU scratch register again */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + uasm_i_ehb(&p); + UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Jump to guest */ + uasm_i_eret(&p); + + return p; +} + +/** + * kvm_mips_build_exception() - Assemble first level guest exception handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble exception vector code for guest execution. The generated vector will + * branch to the common exception handler generated by kvm_mips_build_exit(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + + /* Branch to the common handler */ + uasm_il_b(&p, &r, label_exit_common); + uasm_i_nop(&p); + + uasm_l_exit_common(&l, handler); + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_exit() - Assemble common guest exit handler. + * @addr: Address to start writing code. + * + * Assemble the generic guest exit handling code. This is called by the + * exception vectors (generated by kvm_mips_build_exception()), and calls + * kvm_mips_handle_exit(), then either resumes the guest or returns to the host + * depending on the return value. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exit(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[3]; + struct uasm_reloc relocs[3]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* + * Generic Guest exception handler. We end up here when the guest + * does something that causes a trap to kernel mode. + * + * Both k0/k1 registers will have already been saved (k0 into the vcpu + * structure, and k1 into the scratch_tmp register). + * + * The k1 register will already contain the kvm_vcpu_arch pointer. + */ + + /* Start saving Guest context to VCPU */ + for (i = 0; i < 32; ++i) { + /* Guest k0/k1 saved later */ + if (i == K0 || i == K1) + continue; + UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* We need to save hi/lo and restore them on the way out */ + uasm_i_mfhi(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1); + + uasm_i_mflo(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1); +#endif + + /* Finally save guest k1 to VCPU */ + uasm_i_ehb(&p); + UASM_i_MFC0(&p, T0, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Now that context has been saved, we can use other registers */ + + /* Restore vcpu */ + UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Restore run (vcpu->run) */ + UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1); + + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process + * the exception + */ + UASM_i_MFC0(&p, K0, C0_EPC); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1); + + UASM_i_MFC0(&p, K0, C0_BADVADDR); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr), + K1); + + uasm_i_mfc0(&p, K0, C0_CAUSE); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); + + if (cpu_has_badinstr) { + uasm_i_mfc0(&p, K0, C0_BADINSTR); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstr), K1); + } + + if (cpu_has_badinstrp) { + uasm_i_mfc0(&p, K0, C0_BADINSTRP); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstrp), K1); + } + + /* Now restore the host state just enough to run the handlers */ + + /* Switch EBASE to the one used by Linux */ + /* load up the host EBASE */ + uasm_i_mfc0(&p, V0, C0_STATUS); + + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V0, AT); + + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + UASM_i_LA_mostly(&p, K0, (long)&ebase); + UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0); + build_set_exc_base(&p, K0); + + if (raw_cpu_has_fpu) { + /* + * If FPU is enabled, save FCR31 and clear it so that later + * ctc1's don't trigger FPE for pending exceptions. + */ + uasm_i_lui(&p, AT, ST0_CU1 >> 16); + uasm_i_and(&p, V1, V0, AT); + uasm_il_beqz(&p, &r, V1, label_fpu_1); + uasm_i_nop(&p); + uasm_i_cfc1(&p, T0, 31); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31), + K1); + uasm_i_ctc1(&p, ZERO, 31); + uasm_l_fpu_1(&l, p); + } + + if (cpu_has_msa) { + /* + * If MSA is enabled, save MSACSR and clear it so that later + * instructions don't trigger MSAFPE for pending exceptions. + */ + uasm_i_mfc0(&p, T0, C0_CONFIG5); + uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */ + uasm_il_beqz(&p, &r, T0, label_msa_1); + uasm_i_nop(&p); + uasm_i_cfcmsa(&p, T0, MSA_CSR); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr), + K1); + uasm_i_ctcmsa(&p, MSA_CSR, ZERO); + uasm_l_msa_1(&l, p); + } + +#ifdef CONFIG_KVM_MIPS_VZ + /* Restore host ASID */ + if (!cpu_has_guestid) { + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + UASM_i_MTC0(&p, K0, C0_ENTRYHI); + } + + /* + * Set up normal Linux process pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + */ + UASM_i_LW(&p, A0, + offsetof(struct kvm_vcpu_arch, host_pgd), K1); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Clear GM bit so we don't enter guest mode when EXL is cleared */ + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + /* Save GuestCtl0 so we can access GExcCode after CPU migration */ + uasm_i_sw(&p, K0, + offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1); + + if (cpu_has_guestid) { + /* + * Clear root mode GuestID, so that root TLB operations use the + * root GuestID in the root TLB. + */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */ + uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + } +#endif + + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ + uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); + uasm_i_and(&p, V0, V0, AT); + uasm_i_lui(&p, AT, ST0_CU0 >> 16); + uasm_i_or(&p, V0, V0, AT); +#ifdef CONFIG_64BIT + uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX); +#endif + uasm_i_mtc0(&p, V0, C0_STATUS); + uasm_i_ehb(&p); + + /* Load up host GP */ + UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* Need a stack before we can jump to "C" */ + UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Saved host state */ + UASM_i_ADDIU(&p, SP, SP, -(int)sizeof(struct pt_regs)); + + /* + * XXXKYMA do we need to load the host ASID, maybe not because the + * kernel entries are marked GLOBAL, need to verify + */ + + /* Restore host scratch registers, as we'll have clobbered them */ + kvm_mips_build_restore_scratch(&p, K0, SP); + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Jump to handler */ + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? + * Now jump to the kvm_mips_handle_exit() to see if we can deal + * with this in the kernel + */ + uasm_i_move(&p, A0, S0); + uasm_i_move(&p, A1, S1); + UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); + uasm_i_jalr(&p, RA, T9); + UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); + + uasm_resolve_relocs(relocs, labels); + + p = kvm_mips_build_ret_from_exit(p); + + return p; +} + +/** + * kvm_mips_build_ret_from_exit() - Assemble guest exit return handler. + * @addr: Address to start writing code. + * + * Assemble the code to handle the return from kvm_mips_handle_exit(), either + * resuming the guest or returning to the host depending on the return value. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_from_exit(void *addr) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Return from handler Make sure interrupts are disabled */ + uasm_i_di(&p, ZERO); + uasm_i_ehb(&p); + + /* + * XXXKYMA: k0/k1 could have been blown away if we processed + * an exception while we were handling the exception from the + * guest, reload k1 + */ + + uasm_i_move(&p, K1, S1); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* + * Check return value, should tell us if we are returning to the + * host (handle I/O etc)or resuming the guest + */ + uasm_i_andi(&p, T0, V0, RESUME_HOST); + uasm_il_bnez(&p, &r, T0, label_return_to_host); + uasm_i_nop(&p); + + p = kvm_mips_build_ret_to_guest(p); + + uasm_l_return_to_host(&l, p); + p = kvm_mips_build_ret_to_host(p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_ret_to_guest() - Assemble code to return to the guest. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_guest(void *addr) +{ + u32 *p = addr; + + /* Put the saved pointer to vcpu (s1) back into the scratch register */ + UASM_i_MTC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Load up the Guest EBASE to minimize the window where BEV is set */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + + /* Switch EBASE back to the one used by KVM */ + uasm_i_mfc0(&p, V1, C0_STATUS); + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V1, AT); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + build_set_exc_base(&p, T0); + + /* Setup status register for running guest in UM */ + uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX)); + uasm_i_and(&p, V1, V1, AT); + uasm_i_mtc0(&p, V1, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_ret_to_host() - Assemble code to return to the host. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the host, i.e. to the caller of the vcpu_run + * function generated by kvm_mips_build_vcpu_run(). + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_host(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* EBASE is already pointing to Linux */ + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1); + UASM_i_ADDIU(&p, K1, K1, -(int)sizeof(struct pt_regs)); + + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ + uasm_i_sra(&p, K0, V0, 2); + uasm_i_move(&p, V0, K0); + + /* Load context saved on the host stack */ + for (i = 16; i < 31; ++i) { + if (i == 24) + i = 28; + UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Restore RA, which is the address we will return to */ + UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1); + uasm_i_jr(&p, RA); + uasm_i_nop(&p); + + return p; +} + diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S new file mode 100644 index 000000000..16f17c639 --- /dev/null +++ b/arch/mips/kvm/fpu.S @@ -0,0 +1,125 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * FPU context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/fpregdef.h> +#include <asm/mipsregs.h> +#include <asm/regdef.h> + +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + + .set noreorder + .set noat + +LEAF(__kvm_save_fpu) + .set push + SET_HARDFLOAT + .set fp=64 + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + sdc1 $f1, VCPU_FPR1(a0) + sdc1 $f3, VCPU_FPR3(a0) + sdc1 $f5, VCPU_FPR5(a0) + sdc1 $f7, VCPU_FPR7(a0) + sdc1 $f9, VCPU_FPR9(a0) + sdc1 $f11, VCPU_FPR11(a0) + sdc1 $f13, VCPU_FPR13(a0) + sdc1 $f15, VCPU_FPR15(a0) + sdc1 $f17, VCPU_FPR17(a0) + sdc1 $f19, VCPU_FPR19(a0) + sdc1 $f21, VCPU_FPR21(a0) + sdc1 $f23, VCPU_FPR23(a0) + sdc1 $f25, VCPU_FPR25(a0) + sdc1 $f27, VCPU_FPR27(a0) + sdc1 $f29, VCPU_FPR29(a0) + sdc1 $f31, VCPU_FPR31(a0) +1: sdc1 $f0, VCPU_FPR0(a0) + sdc1 $f2, VCPU_FPR2(a0) + sdc1 $f4, VCPU_FPR4(a0) + sdc1 $f6, VCPU_FPR6(a0) + sdc1 $f8, VCPU_FPR8(a0) + sdc1 $f10, VCPU_FPR10(a0) + sdc1 $f12, VCPU_FPR12(a0) + sdc1 $f14, VCPU_FPR14(a0) + sdc1 $f16, VCPU_FPR16(a0) + sdc1 $f18, VCPU_FPR18(a0) + sdc1 $f20, VCPU_FPR20(a0) + sdc1 $f22, VCPU_FPR22(a0) + sdc1 $f24, VCPU_FPR24(a0) + sdc1 $f26, VCPU_FPR26(a0) + sdc1 $f28, VCPU_FPR28(a0) + jr ra + sdc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_save_fpu) + +LEAF(__kvm_restore_fpu) + .set push + SET_HARDFLOAT + .set fp=64 + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + ldc1 $f1, VCPU_FPR1(a0) + ldc1 $f3, VCPU_FPR3(a0) + ldc1 $f5, VCPU_FPR5(a0) + ldc1 $f7, VCPU_FPR7(a0) + ldc1 $f9, VCPU_FPR9(a0) + ldc1 $f11, VCPU_FPR11(a0) + ldc1 $f13, VCPU_FPR13(a0) + ldc1 $f15, VCPU_FPR15(a0) + ldc1 $f17, VCPU_FPR17(a0) + ldc1 $f19, VCPU_FPR19(a0) + ldc1 $f21, VCPU_FPR21(a0) + ldc1 $f23, VCPU_FPR23(a0) + ldc1 $f25, VCPU_FPR25(a0) + ldc1 $f27, VCPU_FPR27(a0) + ldc1 $f29, VCPU_FPR29(a0) + ldc1 $f31, VCPU_FPR31(a0) +1: ldc1 $f0, VCPU_FPR0(a0) + ldc1 $f2, VCPU_FPR2(a0) + ldc1 $f4, VCPU_FPR4(a0) + ldc1 $f6, VCPU_FPR6(a0) + ldc1 $f8, VCPU_FPR8(a0) + ldc1 $f10, VCPU_FPR10(a0) + ldc1 $f12, VCPU_FPR12(a0) + ldc1 $f14, VCPU_FPR14(a0) + ldc1 $f16, VCPU_FPR16(a0) + ldc1 $f18, VCPU_FPR18(a0) + ldc1 $f20, VCPU_FPR20(a0) + ldc1 $f22, VCPU_FPR22(a0) + ldc1 $f24, VCPU_FPR24(a0) + ldc1 $f26, VCPU_FPR26(a0) + ldc1 $f28, VCPU_FPR28(a0) + jr ra + ldc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_restore_fpu) + +LEAF(__kvm_restore_fcsr) + .set push + SET_HARDFLOAT + lw t0, VCPU_FCR31(a0) + /* + * The ctc1 must stay at this offset in __kvm_restore_fcsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + ctc1 t0, fcr31 + jr ra + nop + .set pop + END(__kvm_restore_fcsr) diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c new file mode 100644 index 000000000..830634351 --- /dev/null +++ b/arch/mips/kvm/hypcall.c @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Hypercall handling. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/kvm_para.h> + +#define MAX_HYPCALL_ARGS 4 + +enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, + union mips_instruction inst) +{ + unsigned int code = (inst.co_format.code >> 5) & 0x3ff; + + kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code); + + switch (code) { + case 0: + return EMULATE_HYPERCALL; + default: + return EMULATE_FAIL; + }; +} + +static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num, + const unsigned long *args, unsigned long *hret) +{ + /* Report unimplemented hypercall to guest */ + *hret = -KVM_ENOSYS; + return RESUME_GUEST; +} + +int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu) +{ + unsigned long num, args[MAX_HYPCALL_ARGS]; + + /* read hypcall number and arguments */ + num = vcpu->arch.gprs[2]; /* v0 */ + args[0] = vcpu->arch.gprs[4]; /* a0 */ + args[1] = vcpu->arch.gprs[5]; /* a1 */ + args[2] = vcpu->arch.gprs[6]; /* a2 */ + args[3] = vcpu->arch.gprs[7]; /* a3 */ + + return kvm_mips_hypercall(vcpu, num, + args, &vcpu->arch.gprs[2] /* v0 */); +} diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c new file mode 100644 index 000000000..aa0a1a00f --- /dev/null +++ b/arch/mips/kvm/interrupt.c @@ -0,0 +1,242 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupt delivery + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/bootmem.h> +#include <asm/page.h> +#include <asm/cacheflush.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" + +void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + set_bit(priority, &vcpu->arch.pending_exceptions); +} + +void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + clear_bit(priority, &vcpu->arch.pending_exceptions); +} + +void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * Cause bits to reflect the pending timer interrupt, + * the EXC code will be set when we are actually + * delivering the interrupt: + */ + kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ5 | C_TI)); + + /* Queue up an INT exception for the core */ + kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_TIMER); + +} + +void kvm_mips_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ5 | C_TI)); + kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * Cause bits to reflect the pending IO interrupt, + * the EXC code will be set when we are actually + * delivering the interrupt: + */ + switch (intr) { + case 2: + kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); + /* Queue up an INT exception for the core */ + kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case 3: + kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1)); + kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case 4: + kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2)); + kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + switch (intr) { + case -2: + kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); + kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case -3: + kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1)); + kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case -4: + kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2)); + kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +/* Deliver the interrupt of the corresponding priority, if possible. */ +int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + int allowed = 0; + u32 exccode; + + struct kvm_vcpu_arch *arch = &vcpu->arch; + struct mips_coproc *cop0 = vcpu->arch.cop0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + if ((kvm_read_c0_guest_status(cop0) & ST0_IE) + && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) + && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) { + allowed = 1; + exccode = EXCCODE_INT; + } + break; + + case MIPS_EXC_INT_IO: + if ((kvm_read_c0_guest_status(cop0) & ST0_IE) + && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) + && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) { + allowed = 1; + exccode = EXCCODE_INT; + } + break; + + case MIPS_EXC_INT_IPI_1: + if ((kvm_read_c0_guest_status(cop0) & ST0_IE) + && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) + && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) { + allowed = 1; + exccode = EXCCODE_INT; + } + break; + + case MIPS_EXC_INT_IPI_2: + if ((kvm_read_c0_guest_status(cop0) & ST0_IE) + && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) + && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) { + allowed = 1; + exccode = EXCCODE_INT; + } + break; + + default: + break; + } + + /* Are we allowed to deliver the interrupt ??? */ + if (allowed) { + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering INT @ pc %#lx\n", arch->pc); + + } else + kvm_err("Trying to deliver interrupt when EXL is already set\n"); + + kvm_change_c0_guest_cause(cop0, CAUSEF_EXCCODE, + (exccode << CAUSEB_EXCCODE)); + + /* XXXSL Set PC to the interrupt exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu); + if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV) + arch->pc += 0x200; + else + arch->pc += 0x180; + + clear_bit(priority, &vcpu->arch.pending_exceptions); + } + + return allowed; +} + +int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + return 1; +} + +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long *pending_clr = &vcpu->arch.pending_exceptions_clr; + unsigned int priority; + + if (!(*pending) && !(*pending_clr)) + return; + + priority = __ffs(*pending_clr); + while (priority <= MIPS_EXC_MAX) { + if (kvm_mips_callbacks->irq_clear(vcpu, priority, cause)) { + if (!KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE) + break; + } + + priority = find_next_bit(pending_clr, + BITS_PER_BYTE * sizeof(*pending_clr), + priority + 1); + } + + priority = __ffs(*pending); + while (priority <= MIPS_EXC_MAX) { + if (kvm_mips_callbacks->irq_deliver(vcpu, priority, cause)) { + if (!KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE) + break; + } + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } + +} + +int kvm_mips_pending_timer(struct kvm_vcpu *vcpu) +{ + return test_bit(MIPS_EXC_INT_TIMER, &vcpu->arch.pending_exceptions); +} diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h new file mode 100644 index 000000000..3bf0a4972 --- /dev/null +++ b/arch/mips/kvm/interrupt.h @@ -0,0 +1,55 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupts + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +/* + * MIPS Exception Priorities, exceptions (including interrupts) are queued up + * for the guest in the order specified by their priorities + */ + +#define MIPS_EXC_RESET 0 +#define MIPS_EXC_SRESET 1 +#define MIPS_EXC_DEBUG_ST 2 +#define MIPS_EXC_DEBUG 3 +#define MIPS_EXC_DDB 4 +#define MIPS_EXC_NMI 5 +#define MIPS_EXC_MCHK 6 +#define MIPS_EXC_INT_TIMER 7 +#define MIPS_EXC_INT_IO 8 +#define MIPS_EXC_EXECUTE 9 +#define MIPS_EXC_INT_IPI_1 10 +#define MIPS_EXC_INT_IPI_2 11 +#define MIPS_EXC_MAX 12 +/* XXXSL More to follow */ + +#define C_TI (_ULCAST_(1) << 30) + +#ifdef CONFIG_KVM_MIPS_VZ +#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1) +#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1) +#else +#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) +#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) +#endif + +void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); +void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); +int kvm_mips_pending_timer(struct kvm_vcpu *vcpu); + +void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu); +void kvm_mips_dequeue_timer_int_cb(struct kvm_vcpu *vcpu); +void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq); +void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq); +int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause); +int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause); +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c new file mode 100644 index 000000000..79485790f --- /dev/null +++ b/arch/mips/kvm/mips.c @@ -0,0 +1,1732 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: MIPS specific KVM APIs + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/bootmem.h> + +#include <asm/fpu.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" +#include "commpage.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +#ifndef VECTORSPACING +#define VECTORSPACING 0x100 /* for EI/VI mode */ +#endif + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, + { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, + { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, + { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, + { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, + { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, + { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, + { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, + { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, + { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, + { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, + { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, + { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU }, + { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, + { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, + { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, +#ifdef CONFIG_KVM_MIPS_VZ + { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU }, + { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU }, + { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU }, + { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU }, + { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU }, + { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU }, + { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU }, + { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU }, +#endif + { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, + { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, + {NULL} +}; + +bool kvm_trace_guest_mode_change; + +int kvm_guest_mode_change_trace_reg(void) +{ + kvm_trace_guest_mode_change = 1; + return 0; +} + +void kvm_guest_mode_change_trace_unreg(void) +{ + kvm_trace_guest_mode_change = 0; +} + +/* + * XXXKYMA: We are simulatoring a processor that has the WII bit set in + * Config7, so we are "runnable" if interrupts are pending + */ +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.pending_exceptions); +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return 1; +} + +int kvm_arch_hardware_enable(void) +{ + return kvm_mips_callbacks->hardware_enable(); +} + +void kvm_arch_hardware_disable(void) +{ + kvm_mips_callbacks->hardware_disable(); +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + switch (type) { + case KVM_VM_MIPS_AUTO: + break; +#ifdef CONFIG_KVM_MIPS_VZ + case KVM_VM_MIPS_VZ: +#else + case KVM_VM_MIPS_TE: +#endif + break; + default: + /* Unsupported KVM type */ + return -EINVAL; + }; + + /* Allocate page table to map GPA -> RPA */ + kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); + if (!kvm->arch.gpa_mm.pgd) + return -ENOMEM; + + return 0; +} + +bool kvm_arch_has_vcpu_debugfs(void) +{ + return false; +} + +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_mips_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_arch_vcpu_free(vcpu); + } + + mutex_lock(&kvm->lock); + + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + + mutex_unlock(&kvm->lock); +} + +static void kvm_mips_free_gpa_pt(struct kvm *kvm) +{ + /* It should always be safe to remove after flushing the whole range */ + WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0)); + pgd_free(NULL, kvm->arch.gpa_mm.pgd); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvm_mips_free_vcpus(kvm); + kvm_mips_free_gpa_pt(kvm); +} + +long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + /* Flush whole GPA */ + kvm_mips_flush_gpa_pt(kvm, 0, ~0); + + /* Let implementation do the rest */ + kvm_mips_callbacks->flush_shadow_all(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + /* + * The slot has been made invalid (ready for moving or deletion), so we + * need to ensure that it can no longer be accessed by any guest VCPUs. + */ + + spin_lock(&kvm->mmu_lock); + /* Flush slot from GPA */ + kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, + slot->base_gfn + slot->npages - 1); + /* Let implementation do the rest */ + kvm_mips_callbacks->flush_shadow_memslot(kvm, slot); + spin_unlock(&kvm->mmu_lock); +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + int needs_flush; + + kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", + __func__, kvm, mem->slot, mem->guest_phys_addr, + mem->memory_size, mem->userspace_addr); + + /* + * If dirty page logging is enabled, write protect all pages in the slot + * ready for dirty logging. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() + */ + if (change == KVM_MR_FLAGS_ONLY && + (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + spin_lock(&kvm->mmu_lock); + /* Write protect GPA page table entries */ + needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, + new->base_gfn + new->npages - 1); + /* Let implementation do the rest */ + if (needs_flush) + kvm_mips_callbacks->flush_shadow_memslot(kvm, new); + spin_unlock(&kvm->mmu_lock); + } +} + +static inline void dump_handler(const char *symbol, void *start, void *end) +{ + u32 *p; + + pr_debug("LEAF(%s)\n", symbol); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + + for (p = start; p < (u32 *)end; ++p) + pr_debug("\t.word\t0x%08x\t\t# %p\n", *p, p); + + pr_debug("\t.set\tpop\n"); + + pr_debug("\tEND(%s)\n", symbol); +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + int err, size; + void *gebase, *p, *handler, *refill_start, *refill_end; + int i; + + struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + + if (err) + goto out_free_cpu; + + kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); + + /* + * Allocate space for host mode exception handlers that handle + * guest mode exits + */ + if (cpu_has_veic || cpu_has_vint) + size = 0x200 + VECTORSPACING * 64; + else + size = 0x4000; + + gebase = kzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL); + + if (!gebase) { + err = -ENOMEM; + goto out_uninit_cpu; + } + kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", + ALIGN(size, PAGE_SIZE), gebase); + + /* + * Check new ebase actually fits in CP0_EBase. The lack of a write gate + * limits us to the low 512MB of physical address space. If the memory + * we allocate is out of range, just give up now. + */ + if (!cpu_has_ebase_wg && virt_to_phys(gebase) >= 0x20000000) { + kvm_err("CP0_EBase.WG required for guest exception base %pK\n", + gebase); + err = -ENOMEM; + goto out_free_gebase; + } + + /* Save new ebase */ + vcpu->arch.guest_ebase = gebase; + + /* Build guest exception vectors dynamically in unmapped memory */ + handler = gebase + 0x2000; + + /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */ + refill_start = gebase; + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT)) + refill_start += 0x080; + refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); + + /* General Exception Entry point */ + kvm_mips_build_exception(gebase + 0x180, handler); + + /* For vectored interrupts poke the exception code @ all offsets 0-7 */ + for (i = 0; i < 8; i++) { + kvm_debug("L1 Vectored handler @ %p\n", + gebase + 0x200 + (i * VECTORSPACING)); + kvm_mips_build_exception(gebase + 0x200 + i * VECTORSPACING, + handler); + } + + /* General exit handler */ + p = handler; + p = kvm_mips_build_exit(p); + + /* Guest entry routine */ + vcpu->arch.vcpu_run = p; + p = kvm_mips_build_vcpu_run(p); + + /* Dump the generated code */ + pr_debug("#include <asm/asm.h>\n"); + pr_debug("#include <asm/regdef.h>\n"); + pr_debug("\n"); + dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); + dump_handler("kvm_tlb_refill", refill_start, refill_end); + dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); + dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); + + /* Invalidate the icache for these ranges */ + flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); + + /* + * Allocate comm page for guest kernel, a TLB will be reserved for + * mapping GVA @ 0xFFFF8000 to this page + */ + vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL); + + if (!vcpu->arch.kseg0_commpage) { + err = -ENOMEM; + goto out_free_gebase; + } + + kvm_debug("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage); + kvm_mips_commpage_init(vcpu); + + /* Init */ + vcpu->arch.last_sched_cpu = -1; + vcpu->arch.last_exec_cpu = -1; + + return vcpu; + +out_free_gebase: + kfree(gebase); + +out_uninit_cpu: + kvm_vcpu_uninit(vcpu); + +out_free_cpu: + kfree(vcpu); + +out: + return ERR_PTR(err); +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + + kvm_vcpu_uninit(vcpu); + + kvm_mips_dump_stats(vcpu); + + kvm_mmu_free_memory_caches(vcpu); + kfree(vcpu->arch.guest_ebase); + kfree(vcpu->arch.kseg0_commpage); + kfree(vcpu); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int r = -EINTR; + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + kvm_mips_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + } + + if (run->immediate_exit) + goto out; + + lose_fpu(1); + + local_irq_disable(); + guest_enter_irqoff(); + trace_kvm_enter(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_run() callback is not + * reordered ahead of the write to vcpu->mode, or we could miss a TLB + * flush request while the requester sees the VCPU as outside of guest + * mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + r = kvm_mips_callbacks->vcpu_run(run, vcpu); + + trace_kvm_out(vcpu); + guest_exit_irqoff(); + local_irq_enable(); + +out: + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + return r; +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + struct kvm_vcpu *dvcpu = NULL; + + if (intr == 3 || intr == -3 || intr == 4 || intr == -4) + kvm_debug("%s: CPU: %d, INTR: %d\n", __func__, irq->cpu, + (int)intr); + + if (irq->cpu == -1) + dvcpu = vcpu; + else + dvcpu = vcpu->kvm->vcpus[irq->cpu]; + + if (intr == 2 || intr == 3 || intr == 4) { + kvm_mips_callbacks->queue_io_int(dvcpu, irq); + + } else if (intr == -2 || intr == -3 || intr == -4) { + kvm_mips_callbacks->dequeue_io_int(dvcpu, irq); + } else { + kvm_err("%s: invalid interrupt ioctl (%d:%d)\n", __func__, + irq->cpu, irq->irq); + return -EINVAL; + } + + dvcpu->arch.wait = 0; + + if (swq_has_sleeper(&dvcpu->wq)) + swake_up_one(&dvcpu->wq); + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +static u64 kvm_mips_get_one_regs[] = { + KVM_REG_MIPS_R0, + KVM_REG_MIPS_R1, + KVM_REG_MIPS_R2, + KVM_REG_MIPS_R3, + KVM_REG_MIPS_R4, + KVM_REG_MIPS_R5, + KVM_REG_MIPS_R6, + KVM_REG_MIPS_R7, + KVM_REG_MIPS_R8, + KVM_REG_MIPS_R9, + KVM_REG_MIPS_R10, + KVM_REG_MIPS_R11, + KVM_REG_MIPS_R12, + KVM_REG_MIPS_R13, + KVM_REG_MIPS_R14, + KVM_REG_MIPS_R15, + KVM_REG_MIPS_R16, + KVM_REG_MIPS_R17, + KVM_REG_MIPS_R18, + KVM_REG_MIPS_R19, + KVM_REG_MIPS_R20, + KVM_REG_MIPS_R21, + KVM_REG_MIPS_R22, + KVM_REG_MIPS_R23, + KVM_REG_MIPS_R24, + KVM_REG_MIPS_R25, + KVM_REG_MIPS_R26, + KVM_REG_MIPS_R27, + KVM_REG_MIPS_R28, + KVM_REG_MIPS_R29, + KVM_REG_MIPS_R30, + KVM_REG_MIPS_R31, + +#ifndef CONFIG_CPU_MIPSR6 + KVM_REG_MIPS_HI, + KVM_REG_MIPS_LO, +#endif + KVM_REG_MIPS_PC, +}; + +static u64 kvm_mips_get_one_regs_fpu[] = { + KVM_REG_MIPS_FCR_IR, + KVM_REG_MIPS_FCR_CSR, +}; + +static u64 kvm_mips_get_one_regs_msa[] = { + KVM_REG_MIPS_MSA_IR, + KVM_REG_MIPS_MSA_CSR, +}; + +static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_mips_get_one_regs); + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + ret += ARRAY_SIZE(kvm_mips_get_one_regs_fpu) + 48; + /* odd doubles */ + if (boot_cpu_data.fpu_id & MIPS_FPIR_F64) + ret += 16; + } + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; + ret += kvm_mips_callbacks->num_regs(vcpu); + + return ret; +} + +static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_mips_get_one_regs, + sizeof(kvm_mips_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs); + + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_fpu, + sizeof(kvm_mips_get_one_regs_fpu))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_fpu); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_FPR_32(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + + /* skip odd doubles if no F64 */ + if (i & 1 && !(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + continue; + + index = KVM_REG_MIPS_FPR_64(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_msa, + sizeof(kvm_mips_get_one_regs_msa))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_msa); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_VEC_128(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); +} + +static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + int ret; + s64 v; + s64 vs[2]; + unsigned int idx; + + switch (reg->id) { + /* General purpose registers */ + case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31: + v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0]; + break; +#ifndef CONFIG_CPU_MIPSR6 + case KVM_REG_MIPS_HI: + v = (long)vcpu->arch.hi; + break; + case KVM_REG_MIPS_LO: + v = (long)vcpu->arch.lo; + break; +#endif + case KVM_REG_MIPS_PC: + v = (long)vcpu->arch.pc; + break; + + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + v = get_fpr32(&fpu->fpr[idx], 0); + else + v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + v = get_fpr64(&fpu->fpr[idx], 0); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.fpu_id; + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = fpu->fcr31; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Can't access MSA registers in FR=0 mode */ + if (!(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 0); + vs[1] = get_fpr64(&fpu->fpr[idx], 1); +#else + /* most significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 1); + vs[1] = get_fpr64(&fpu->fpr[idx], 0); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.msa_id; + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = fpu->msacsr; + break; + + /* registers to be handled specially */ + default: + ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); + if (ret) + return ret; + break; + } + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + + return put_user(v, uaddr64); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + u32 v32 = (u32)v; + + return put_user(v32, uaddr32); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; + } else { + return -EINVAL; + } +} + +static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + s64 v; + s64 vs[2]; + unsigned int idx; + + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + + if (get_user(v, uaddr64) != 0) + return -EFAULT; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + s32 v32; + + if (get_user(v32, uaddr32) != 0) + return -EFAULT; + v = (s64)v32; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; + } else { + return -EINVAL; + } + + switch (reg->id) { + /* General purpose registers */ + case KVM_REG_MIPS_R0: + /* Silently ignore requests to set $0 */ + break; + case KVM_REG_MIPS_R1 ... KVM_REG_MIPS_R31: + vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0] = v; + break; +#ifndef CONFIG_CPU_MIPSR6 + case KVM_REG_MIPS_HI: + vcpu->arch.hi = v; + break; + case KVM_REG_MIPS_LO: + vcpu->arch.lo = v; + break; +#endif + case KVM_REG_MIPS_PC: + vcpu->arch.pc = v; + break; + + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + set_fpr32(&fpu->fpr[idx], 0, v); + else + set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + set_fpr64(&fpu->fpr[idx], 0, v); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + fpu->fcr31 = v; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + set_fpr64(&fpu->fpr[idx], 0, vs[0]); + set_fpr64(&fpu->fpr[idx], 1, vs[1]); +#else + /* most significant byte first */ + set_fpr64(&fpu->fpr[idx], 1, vs[0]); + set_fpr64(&fpu->fpr[idx], 0, vs[1]); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + fpu->msacsr = v; + break; + + /* registers to be handled specially */ + default: + return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); + } + return 0; +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap)) + return -EINVAL; + if (cap->flags) + return -EINVAL; + if (cap->args[0]) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_MIPS_FPU: + vcpu->arch.fpu_enabled = true; + break; + case KVM_CAP_MIPS_MSA: + vcpu->arch.msa_enabled = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_mips_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, + irq.irq); + + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + vcpu_load(vcpu); + + switch (ioctl) { + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_mips_set_reg(vcpu, ®); + else + r = kvm_mips_get_reg(vcpu, ®); + break; + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; + n = reg_list.n; + reg_list.n = kvm_mips_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + break; + r = -E2BIG; + if (n < reg_list.n) + break; + r = kvm_mips_copy_reg_indices(vcpu, user_list->reg); + break; + } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } + default: + r = -ENOIOCTLCMD; + } + + vcpu_put(vcpu); + return r; +} + +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + bool is_dirty = false; + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); + + if (is_dirty) { + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); + + /* Let implementation handle TLB/GVA invalidation */ + kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); + } + + mutex_unlock(&kvm->slots_lock); + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + long r; + + switch (ioctl) { + default: + r = -ENOIOCTLCMD; + } + + return r; +} + +int kvm_arch_init(void *opaque) +{ + if (kvm_mips_callbacks) { + kvm_err("kvm: module already exists\n"); + return -EEXIST; + } + + return kvm_mips_emulation_init(&kvm_mips_callbacks); +} + +void kvm_arch_exit(void) +{ + kvm_mips_callbacks = NULL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOIOCTLCMD; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; + case KVM_CAP_MIPS_FPU: + /* We don't handle systems with inconsistent cpu_has_fpu */ + r = !!raw_cpu_has_fpu; + break; + case KVM_CAP_MIPS_MSA: + /* + * We don't support MSA vector partitioning yet: + * 1) It would require explicit support which can't be tested + * yet due to lack of support in current hardware. + * 2) It extends the state that would need to be saved/restored + * by e.g. QEMU for migration. + * + * When vector partitioning hardware becomes available, support + * could be added by requiring a flag when enabling + * KVM_CAP_MIPS_MSA capability to indicate that userland knows + * to save/restore the appropriate extra state. + */ + r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); + break; + default: + r = kvm_mips_callbacks->check_extension(kvm, ext); + break; + } + return r; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvm_mips_pending_timer(vcpu) || + kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI; +} + +int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) +{ + int i; + struct mips_coproc *cop0; + + if (!vcpu) + return -1; + + kvm_debug("VCPU Register Dump:\n"); + kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc); + kvm_debug("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); + + for (i = 0; i < 32; i += 4) { + kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, + vcpu->arch.gprs[i], + vcpu->arch.gprs[i + 1], + vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]); + } + kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi); + kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); + + cop0 = vcpu->arch.cop0; + kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n", + kvm_read_c0_guest_status(cop0), + kvm_read_c0_guest_cause(cop0)); + + kvm_debug("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + vcpu->arch.gprs[i] = regs->gpr[i]; + vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */ + vcpu->arch.hi = regs->hi; + vcpu->arch.lo = regs->lo; + vcpu->arch.pc = regs->pc; + + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + regs->gpr[i] = vcpu->arch.gprs[i]; + + regs->hi = vcpu->arch.hi; + regs->lo = vcpu->arch.lo; + regs->pc = vcpu->arch.pc; + + vcpu_put(vcpu); + return 0; +} + +static void kvm_mips_comparecount_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvm_mips_callbacks->queue_timer_int(vcpu); + + vcpu->arch.wait = 0; + if (swq_has_sleeper(&vcpu->wq)) + swake_up_one(&vcpu->wq); +} + +/* low level hrtimer wake routine */ +static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); + kvm_mips_comparecount_func((unsigned long) vcpu); + return kvm_mips_count_timeout(vcpu); +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + int err; + + err = kvm_mips_callbacks->vcpu_init(vcpu); + if (err) + return err; + + hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mips_callbacks->vcpu_uninit(vcpu); +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return 0; +} + +/* Initial guest state */ +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return kvm_mips_callbacks->vcpu_setup(vcpu); +} + +static void kvm_mips_set_c0_status(void) +{ + u32 status = read_c0_status(); + + if (cpu_has_dsp) + status |= (ST0_MX); + + write_c0_status(status); + ehb(); +} + +/* + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + enum emulation_result er = EMULATE_DONE; + u32 inst; + int ret = RESUME_GUEST; + + vcpu->mode = OUTSIDE_GUEST_MODE; + + /* re-enable HTW before enabling interrupts */ + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + htw_start(); + + /* Set a default exit reason */ + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + /* + * Set the appropriate status bits based on host CPU features, + * before we hit the scheduler + */ + kvm_mips_set_c0_status(); + + local_irq_enable(); + + kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", + cause, opc, run, vcpu); + trace_kvm_exit(vcpu, exccode); + + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + /* + * Do a privilege check, if in UM most of these exit conditions + * end up causing an exception to be delivered to the Guest + * Kernel + */ + er = kvm_mips_check_privilege(cause, opc, run, vcpu); + if (er == EMULATE_PRIV_FAIL) { + goto skip_emul; + } else if (er == EMULATE_FAIL) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + goto skip_emul; + } + } + + switch (exccode) { + case EXCCODE_INT: + kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc); + + ++vcpu->stat.int_exits; + + if (need_resched()) + cond_resched(); + + ret = RESUME_GUEST; + break; + + case EXCCODE_CPU: + kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc); + + ++vcpu->stat.cop_unusable_exits; + ret = kvm_mips_callbacks->handle_cop_unusable(vcpu); + /* XXXKYMA: Might need to return to user space */ + if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) + ret = RESUME_HOST; + break; + + case EXCCODE_MOD: + ++vcpu->stat.tlbmod_exits; + ret = kvm_mips_callbacks->handle_tlb_mod(vcpu); + break; + + case EXCCODE_TLBS: + kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n", + cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, + badvaddr); + + ++vcpu->stat.tlbmiss_st_exits; + ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu); + break; + + case EXCCODE_TLBL: + kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); + + ++vcpu->stat.tlbmiss_ld_exits; + ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu); + break; + + case EXCCODE_ADES: + ++vcpu->stat.addrerr_st_exits; + ret = kvm_mips_callbacks->handle_addr_err_st(vcpu); + break; + + case EXCCODE_ADEL: + ++vcpu->stat.addrerr_ld_exits; + ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu); + break; + + case EXCCODE_SYS: + ++vcpu->stat.syscall_exits; + ret = kvm_mips_callbacks->handle_syscall(vcpu); + break; + + case EXCCODE_RI: + ++vcpu->stat.resvd_inst_exits; + ret = kvm_mips_callbacks->handle_res_inst(vcpu); + break; + + case EXCCODE_BP: + ++vcpu->stat.break_inst_exits; + ret = kvm_mips_callbacks->handle_break(vcpu); + break; + + case EXCCODE_TR: + ++vcpu->stat.trap_inst_exits; + ret = kvm_mips_callbacks->handle_trap(vcpu); + break; + + case EXCCODE_MSAFPE: + ++vcpu->stat.msa_fpe_exits; + ret = kvm_mips_callbacks->handle_msa_fpe(vcpu); + break; + + case EXCCODE_FPE: + ++vcpu->stat.fpe_exits; + ret = kvm_mips_callbacks->handle_fpe(vcpu); + break; + + case EXCCODE_MSADIS: + ++vcpu->stat.msa_disabled_exits; + ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); + break; + + case EXCCODE_GE: + /* defer exit accounting to handler */ + ret = kvm_mips_callbacks->handle_guest_exit(vcpu); + break; + + default: + if (cause & CAUSEF_BD) + opc += 1; + inst = 0; + kvm_get_badinstr(opc, vcpu, &inst); + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_arch_vcpu_dump_regs(vcpu); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + } + +skip_emul: + local_irq_disable(); + + if (ret == RESUME_GUEST) + kvm_vz_acquire_htimer(vcpu); + + if (er == EMULATE_DONE && !(ret & RESUME_HOST)) + kvm_mips_deliver_interrupts(vcpu, cause); + + if (!(ret & RESUME_HOST)) { + /* Only check for signals if not already exiting to userspace */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + ret = (-EINTR << 2) | RESUME_HOST; + ++vcpu->stat.signal_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL); + } + } + + if (ret == RESUME_GUEST) { + trace_kvm_reenter(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_reenter() + * callback is not reordered ahead of the write to vcpu->mode, + * or we could miss a TLB flush request while the requester sees + * the VCPU as outside of guest mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + kvm_mips_callbacks->vcpu_reenter(run, vcpu); + + /* + * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context + * is live), restore FCR31 / MSACSR. + * + * This should be before returning to the guest exception + * vector, as it may well cause an [MSA] FP exception if there + * are pending exception bits unmasked. (see + * kvm_mips_csr_die_notifier() for how that is handled). + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch) && + read_c0_status() & ST0_CU1) + __kvm_restore_fcsr(&vcpu->arch); + + if (kvm_mips_guest_has_msa(&vcpu->arch) && + read_c0_config5() & MIPS_CONF5_MSAEN) + __kvm_restore_msacsr(&vcpu->arch); + } + + /* Disable HTW before returning to guest or host */ + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + htw_stop(); + + return ret; +} + +/* Enable FPU for guest and restore context */ +void kvm_own_fpu(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + sr = kvm_read_c0_guest_status(cop0); + + /* + * If MSA state is already live, it is undefined how it interacts with + * FR=0 FPU state, and we don't want to hit reserved instruction + * exceptions trying to save the MSA state later when CU=1 && FR=1, so + * play it safe and save it first. + * + * In theory we shouldn't ever hit this case since kvm_lose_fpu() should + * get called when guest CU1 is set, however we can't trust the guest + * not to clobber the status register directly via the commpage. + */ + if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + /* + * Enable FPU for guest + * We set FR and FRE according to guest context + */ + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + enable_fpu_hazard(); + + /* If guest FPU state not active, restore it now */ + if (!(vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { + __kvm_restore_fpu(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU); + } else { + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_FPU); + } + + preempt_enable(); +} + +#ifdef CONFIG_CPU_HAS_MSA +/* Enable MSA for guest and restore context */ +void kvm_own_msa(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + /* + * Enable FPU if enabled in guest, since we're restoring FPU context + * anyway. We set FR and FRE according to guest context. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + sr = kvm_read_c0_guest_status(cop0); + + /* + * If FR=0 FPU state is already live, it is undefined how it + * interacts with MSA state, so play it safe and save it first. + */ + if (!(sr & ST0_FR) && + (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | + KVM_MIPS_AUX_MSA)) == KVM_MIPS_AUX_FPU) + kvm_lose_fpu(vcpu); + + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (sr & ST0_CU1 && cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + } + + /* Enable MSA for guest */ + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + switch (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA)) { + case KVM_MIPS_AUX_FPU: + /* + * Guest FPU state already loaded, only restore upper MSA state + */ + __kvm_restore_msa_upper(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_MSA); + break; + case 0: + /* Neither FPU or MSA already active, restore full MSA state */ + __kvm_restore_msa(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; + if (kvm_mips_guest_has_fpu(&vcpu->arch)) + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_FPU_MSA); + break; + default: + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_MSA); + break; + } + + preempt_enable(); +} +#endif + +/* Drop FPU & MSA without saving it */ +void kvm_drop_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + disable_msa(); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_MSA); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_MSA; + } + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_FPU); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; + } + preempt_enable(); +} + +/* Save and disable FPU & MSA */ +void kvm_lose_fpu(struct kvm_vcpu *vcpu) +{ + /* + * With T&E, FPU & MSA get disabled in root context (hardware) when it + * is disabled in guest context (software), but the register state in + * the hardware may still be in use. + * This is why we explicitly re-enable the hardware before saving. + */ + + preempt_disable(); + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + } + + __kvm_save_msa(&vcpu->arch); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); + + /* Disable MSA & FPU */ + disable_msa(); + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); + } + vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); + } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + set_c0_status(ST0_CU1); + enable_fpu_hazard(); + } + + __kvm_save_fpu(&vcpu->arch); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); + + /* Disable FPU */ + clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); + } + preempt_enable(); +} + +/* + * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are + * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP + * exception if cause bits are set in the value being written. + */ +static int kvm_mips_csr_die_notify(struct notifier_block *self, + unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + unsigned long pc; + + /* Only interested in FPE and MSAFPE */ + if (cmd != DIE_FP && cmd != DIE_MSAFP) + return NOTIFY_DONE; + + /* Return immediately if guest context isn't active */ + if (!(current->flags & PF_VCPU)) + return NOTIFY_DONE; + + /* Should never get here from user mode */ + BUG_ON(user_mode(regs)); + + pc = instruction_pointer(regs); + switch (cmd) { + case DIE_FP: + /* match 2nd instruction in __kvm_restore_fcsr */ + if (pc != (unsigned long)&__kvm_restore_fcsr + 4) + return NOTIFY_DONE; + break; + case DIE_MSAFP: + /* match 2nd/3rd instruction in __kvm_restore_msacsr */ + if (!cpu_has_msa || + pc < (unsigned long)&__kvm_restore_msacsr + 4 || + pc > (unsigned long)&__kvm_restore_msacsr + 8) + return NOTIFY_DONE; + break; + } + + /* Move PC forward a little and continue executing */ + instruction_pointer(regs) += 4; + + return NOTIFY_STOP; +} + +static struct notifier_block kvm_mips_csr_die_notifier = { + .notifier_call = kvm_mips_csr_die_notify, +}; + +static int __init kvm_mips_init(void) +{ + int ret; + + ret = kvm_mips_entry_setup(); + if (ret) + return ret; + + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + + if (ret) + return ret; + + register_die_notifier(&kvm_mips_csr_die_notifier); + + return 0; +} + +static void __exit kvm_mips_exit(void) +{ + kvm_exit(); + + unregister_die_notifier(&kvm_mips_csr_die_notifier); +} + +module_init(kvm_mips_init); +module_exit(kvm_mips_exit); + +EXPORT_TRACEPOINT_SYMBOL(kvm_exit); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c new file mode 100644 index 000000000..098a7afd4 --- /dev/null +++ b/arch/mips/kvm/mmu.c @@ -0,0 +1,1261 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS MMU handling in the KVM module. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/highmem.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> + +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels + * for which pages need to be cached. + */ +#if defined(__PAGETABLE_PMD_FOLDED) +#define KVM_MMU_CACHE_MIN_PAGES 1 +#else +#define KVM_MMU_CACHE_MIN_PAGES 2 +#endif + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + int min, int max) +{ + void *page; + + BUG_ON(max > KVM_NR_MEM_OBJS); + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < max) { + page = (void *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + cache->objects[cache->nobjs++] = page; + } + return 0; +} + +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); +} + +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) +{ + void *p; + + BUG_ON(!mc || !mc->nobjs); + p = mc->objects[--mc->nobjs]; + return p; +} + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +/** + * kvm_pgd_init() - Initialise KVM GPA page directory. + * @page: Pointer to page directory (PGD) for KVM GPA. + * + * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. + * representing no mappings. This is similar to pgd_init(), however it + * initialises all the page directory pointers, not just the ones corresponding + * to the userland address space (since it is for the guest physical address + * space rather than a virtual address space). + */ +static void kvm_pgd_init(void *page) +{ + unsigned long *p, *end; + unsigned long entry; + +#ifdef __PAGETABLE_PMD_FOLDED + entry = (unsigned long)invalid_pte_table; +#else + entry = (unsigned long)invalid_pmd_table; +#endif + + p = (unsigned long *)page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} + +/** + * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. + * + * Allocate a blank KVM GPA page directory (PGD) for representing guest physical + * to host physical page mappings. + * + * Returns: Pointer to new KVM GPA page directory. + * NULL on allocation failure. + */ +pgd_t *kvm_pgd_alloc(void) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); + if (ret) + kvm_pgd_init(ret); + + return ret; +} + +/** + * kvm_mips_walk_pgd() - Walk page table with optional allocation. + * @pgd: Page directory pointer. + * @addr: Address to index page table using. + * @cache: MMU page cache to allocate new page tables from, or NULL. + * + * Walk the page tables pointed to by @pgd to find the PTE corresponding to the + * address @addr. If page tables don't exist for @addr, they will be created + * from the MMU cache if @cache is not NULL. + * + * Returns: Pointer to pte_t corresponding to @addr. + * NULL if a page table doesn't exist for @addr and !@cache. + * NULL if a page table allocation failed. + */ +static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + pud_t *pud; + pmd_t *pmd; + + pgd += pgd_index(addr); + if (pgd_none(*pgd)) { + /* Not used on MIPS yet */ + BUG(); + return NULL; + } + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + pmd_t *new_pmd; + + if (!cache) + return NULL; + new_pmd = mmu_memory_cache_alloc(cache); + pmd_init((unsigned long)new_pmd, + (unsigned long)invalid_pte_table); + pud_populate(NULL, pud, new_pmd); + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new_pte; + + if (!cache) + return NULL; + new_pte = mmu_memory_cache_alloc(cache); + clear_page(new_pte); + pmd_populate_kernel(NULL, pmd, new_pte); + } + return pte_offset(pmd, addr); +} + +/* Caller must hold kvm->mm_lock */ +static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); +} + +/* + * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ + +static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, + unsigned long end_gpa) +{ + int i_min = __pte_offset(start_gpa); + int i_max = __pte_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, + unsigned long end_gpa) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = __pmd_offset(start_gpa); + int i_max = __pmd_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pmd_present(pmd[i])) + continue; + + pte = pte_offset(pmd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, + unsigned long end_gpa) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = __pud_offset(start_gpa); + int i_max = __pud_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, + unsigned long end_gpa) +{ + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gpa); + int i_max = pgd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pgd_present(pgd[i])) + continue; + + pud = pud_offset(pgd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +/** + * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Flushes a range of GPA mappings from the GPA page tables. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether its safe to remove the top level page directory because + * all lower levels have been removed. + */ +bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +#define BUILD_PTE_RANGE_OP(name, op) \ +static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + int i_min = __pte_offset(start); \ + int i_max = __pte_offset(end); \ + int i; \ + pte_t old, new; \ + \ + for (i = i_min; i <= i_max; ++i) { \ + if (!pte_present(pte[i])) \ + continue; \ + \ + old = pte[i]; \ + new = op(old); \ + if (pte_val(new) == pte_val(old)) \ + continue; \ + set_pte(pte + i, new); \ + ret = 1; \ + } \ + return ret; \ +} \ + \ +/* returns true if anything was done */ \ +static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pte_t *pte; \ + unsigned long cur_end = ~0ul; \ + int i_min = __pmd_offset(start); \ + int i_max = __pmd_offset(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pmd_present(pmd[i])) \ + continue; \ + \ + pte = pte_offset(pmd + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pmd_t *pmd; \ + unsigned long cur_end = ~0ul; \ + int i_min = __pud_offset(start); \ + int i_max = __pud_offset(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pud_present(pud[i])) \ + continue; \ + \ + pmd = pmd_offset(pud + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pud_t *pud; \ + unsigned long cur_end = ~0ul; \ + int i_min = pgd_index(start); \ + int i_max = pgd_index(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pgd_present(pgd[i])) \ + continue; \ + \ + pud = pud_offset(pgd + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ + } \ + return ret; \ +} + +/* + * kvm_mips_mkclean_gpa_pt. + * Mark a range of guest physical address space clean (writes fault) in the VM's + * GPA page table to allow dirty page tracking. + */ + +BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) + +/** + * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Make a range of GPA mappings clean so that guest writes will fault and + * trigger dirty page logging. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether any GPA mappings were modified, which would require + * derived mappings (GVA page tables & TLB enties) to be + * invalidated. + */ +int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +/** + * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire @kvm->mmu_lock. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + gfn_t base_gfn = slot->base_gfn + gfn_offset; + gfn_t start = base_gfn + __ffs(mask); + gfn_t end = base_gfn + __fls(mask); + + kvm_mips_mkclean_gpa_pt(kvm, start, end); +} + +/* + * kvm_mips_mkold_gpa_pt. + * Mark a range of guest physical address space old (all accesses fault) in the + * VM's GPA page table to allow detection of commonly used pages. + */ + +BUILD_PTE_RANGE_OP(mkold, pte_mkold) + +static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, + gfn_t end_gfn) +{ + return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, gfn_t gfn, + gpa_t gfn_end, + struct kvm_memory_slot *memslot, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + + ret |= handler(kvm, gfn, gfn_end, memslot, data); + } + + return ret; +} + + +static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); + return 1; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + bool blockable) +{ + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + + kvm_mips_callbacks->flush_shadow_all(kvm); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t hva_pte = *(pte_t *)data; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + pte_t old_pte; + + if (!gpa_pte) + return 0; + + /* Mapping may need adjusting depending on memslot flags */ + old_pte = *gpa_pte; + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) + hva_pte = pte_mkclean(hva_pte); + else if (memslot->flags & KVM_MEM_READONLY) + hva_pte = pte_wrprotect(hva_pte); + + set_pte(gpa_pte, hva_pte); + + /* Replacing an absent or old page doesn't need flushes */ + if (!pte_present(old_pte) || !pte_young(old_pte)) + return 0; + + /* Pages swapped, aged, moved, or cleaned require flushes */ + return !pte_present(hva_pte) || + !pte_young(hva_pte) || + pte_pfn(old_pte) != pte_pfn(hva_pte) || + (pte_dirty(old_pte) && !pte_dirty(hva_pte)); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + int ret; + + ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); + if (ret) + kvm_mips_callbacks->flush_shadow_all(kvm); +} + +static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + + if (!gpa_pte) + return 0; + return pte_young(*gpa_pte); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + +/** + * _kvm_mips_map_page_fast() - Fast path GPA fault handler. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Perform fast path GPA fault handling, doing all that can be done without + * calling into KVM. This handles marking old pages young (for idle page + * tracking), and dirtying of clean pages (for dirty page logging). + * + * Returns: 0 on success, in which case we can update derived mappings and + * resume guest execution. + * -EFAULT on failure due to absent GPA mapping or write to + * read-only page, in which case KVM must be consulted. + */ +static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + gfn_t gfn = gpa >> PAGE_SHIFT; + pte_t *ptep; + kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ + bool pfn_valid = false; + int ret = 0; + + spin_lock(&kvm->mmu_lock); + + /* Fast path - just check GPA page table for an existing entry */ + ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + if (!ptep || !pte_present(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track access to pages marked old */ + if (!pte_young(*ptep)) { + set_pte(ptep, pte_mkyoung(*ptep)); + pfn = pte_pfn(*ptep); + pfn_valid = true; + /* call kvm_set_pfn_accessed() after unlock */ + } + if (write_fault && !pte_dirty(*ptep)) { + if (!pte_write(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track dirtying of writeable pages */ + set_pte(ptep, pte_mkdirty(*ptep)); + pfn = pte_pfn(*ptep); + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + +out: + spin_unlock(&kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); + return ret; +} + +/** + * kvm_mips_map_page() - Map a guest physical page. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Handle GPA faults by creating a new GPA mapping (or updating an existing + * one). + * + * This takes care of marking pages young or dirty (idle/dirty page tracking), + * asking KVM for the corresponding PFN, and creating a mapping in the GPA page + * tables. Derived mappings (GVA page tables and TLBs) must be handled by the + * caller. + * + * Returns: 0 on success, in which case the caller may use the @out_entry + * and @out_buddy PTEs to update derived mappings and resume guest + * execution. + * -EFAULT if there is no memory region at @gpa or a write was + * attempted to a read-only memory region. This is usually handled + * as an MMIO access. + */ +static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + gfn_t gfn = gpa >> PAGE_SHIFT; + int srcu_idx, err; + kvm_pfn_t pfn; + pte_t *ptep, entry, old_pte; + bool writeable; + unsigned long prot_bits; + unsigned long mmu_seq; + + /* Try the fast path to handle old / clean pages */ + srcu_idx = srcu_read_lock(&kvm->srcu); + err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, + out_buddy); + if (!err) + goto out; + + /* We need a minimum of cached pages ready for page table creation */ + err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (err) + goto out; + +retry: + /* + * Used to check for invalidations in progress, of the pfn that is + * returned by pfn_to_pfn_prot below. + */ + mmu_seq = kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in + * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * risk the page we get a reference to getting unmapped before we have a + * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * + * This smp_rmb() pairs with the effective smp_wmb() of the combination + * of the pte_unmap_unlock() after the PTE is zapped, and the + * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before + * mmu_notifier_seq is incremented. + */ + smp_rmb(); + + /* Slow path - ask KVM core whether we can access this GPA */ + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); + if (is_error_noslot_pfn(pfn)) { + err = -EFAULT; + goto out; + } + + spin_lock(&kvm->mmu_lock); + /* Check if an invalidation has taken place since we got pfn */ + if (mmu_notifier_retry(kvm, mmu_seq)) { + /* + * This can happen when mappings are changed asynchronously, but + * also synchronously if a COW is triggered by + * gfn_to_pfn_prot(). + */ + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + goto retry; + } + + /* Ensure page tables are allocated */ + ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); + + /* Set up the PTE */ + prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; + if (writeable) { + prot_bits |= _PAGE_WRITE; + if (write_fault) { + prot_bits |= __WRITEABLE; + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + } + entry = pfn_pte(pfn, __pgprot(prot_bits)); + + /* Write the PTE */ + old_pte = *ptep; + set_pte(ptep, entry); + + err = 0; + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + kvm_set_pfn_accessed(pfn); +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return err; +} + +static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu, + unsigned long addr) +{ + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + pgd_t *pgdp; + int ret; + + /* We need a minimum of cached pages ready for page table creation */ + ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (ret) + return NULL; + + if (KVM_GUEST_KERNEL_MODE(vcpu)) + pgdp = vcpu->arch.guest_kernel_mm.pgd; + else + pgdp = vcpu->arch.guest_user_mm.pgd; + + return kvm_mips_walk_pgd(pgdp, memcache, addr); +} + +void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, + bool user) +{ + pgd_t *pgdp; + pte_t *ptep; + + addr &= PAGE_MASK << 1; + + pgdp = vcpu->arch.guest_kernel_mm.pgd; + ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); + if (ptep) { + ptep[0] = pfn_pte(0, __pgprot(0)); + ptep[1] = pfn_pte(0, __pgprot(0)); + } + + if (user) { + pgdp = vcpu->arch.guest_user_mm.pgd; + ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); + if (ptep) { + ptep[0] = pfn_pte(0, __pgprot(0)); + ptep[1] = pfn_pte(0, __pgprot(0)); + } + } +} + +/* + * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ + +static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva, + unsigned long end_gva) +{ + int i_min = __pte_offset(start_gva); + int i_max = __pte_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + /* + * There's no freeing to do, so there's no point clearing individual + * entries unless only part of the last level page table needs flushing. + */ + if (safe_to_remove) + return true; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); + } + return false; +} + +static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, + unsigned long end_gva) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = __pmd_offset(start_gva); + int i_max = __pmd_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pmd_present(pmd[i])) + continue; + + pte = pte_offset(pmd + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pte(pte, start_gva, end)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, + unsigned long end_gva) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = __pud_offset(start_gva); + int i_max = __pud_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, + unsigned long end_gva) +{ + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gva); + int i_max = pgd_index(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pgd_present(pgd[i])) + continue; + + pud = pud_offset(pgd + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pud(pud, start_gva, end)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags) +{ + if (flags & KMF_GPA) { + /* all of guest virtual address space could be affected */ + if (flags & KMF_KERN) + /* useg, kseg0, seg2/3 */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff); + else + /* useg */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); + } else { + /* useg */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); + + /* kseg2/3 */ + if (flags & KMF_KERN) + kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff); + } +} + +static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte) +{ + /* + * Don't leak writeable but clean entries from GPA page tables. We don't + * want the normal Linux tlbmod handler to handle dirtying when KVM + * accesses guest memory. + */ + if (!pte_dirty(pte)) + pte = pte_wrprotect(pte); + + return pte; +} + +static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) +{ + /* Guest EntryLo overrides host EntryLo */ + if (!(entrylo & ENTRYLO_D)) + pte = pte_mkclean(pte); + + return kvm_mips_gpa_pte_to_gva_unmapped(pte); +} + +#ifdef CONFIG_KVM_MIPS_VZ +int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu, + bool write_fault) +{ + int ret; + + ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); + if (ret) + return ret; + + /* Invalidate this entry in the TLB */ + return kvm_vz_host_tlb_inv(vcpu, badvaddr); +} +#endif + +/* XXXKYMA: Must be called with interrupts disabled */ +int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu, + bool write_fault) +{ + unsigned long gpa; + pte_t pte_gpa[2], *ptep_gva; + int idx; + + if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { + kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); + kvm_mips_dump_host_tlbs(); + return -1; + } + + /* Get the GPA page table entry */ + gpa = KVM_GUEST_CPHYSADDR(badvaddr); + idx = (badvaddr >> PAGE_SHIFT) & 1; + if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx], + &pte_gpa[!idx]) < 0) + return -1; + + /* Get the GVA page table entry */ + ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE); + if (!ptep_gva) { + kvm_err("No ptep for gva %lx\n", badvaddr); + return -1; + } + + /* Copy a pair of entries from GPA page table to GVA page table */ + ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]); + ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]); + + /* Invalidate this entry in the TLB, guest kernel ASID only */ + kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); + return 0; +} + +int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, + struct kvm_mips_tlb *tlb, + unsigned long gva, + bool write_fault) +{ + struct kvm *kvm = vcpu->kvm; + long tlb_lo[2]; + pte_t pte_gpa[2], *ptep_buddy, *ptep_gva; + unsigned int idx = TLB_LO_IDX(*tlb, gva); + bool kernel = KVM_GUEST_KERNEL_MODE(vcpu); + + tlb_lo[0] = tlb->tlb_lo[0]; + tlb_lo[1] = tlb->tlb_lo[1]; + + /* + * The commpage address must not be mapped to anything else if the guest + * TLB contains entries nearby, or commpage accesses will break. + */ + if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1))) + tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0; + + /* Get the GPA page table entry */ + if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]), + write_fault, &pte_gpa[idx], NULL) < 0) + return -1; + + /* And its GVA buddy's GPA page table entry if it also exists */ + pte_gpa[!idx] = pfn_pte(0, __pgprot(0)); + if (tlb_lo[!idx] & ENTRYLO_V) { + spin_lock(&kvm->mmu_lock); + ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL, + mips3_tlbpfn_to_paddr(tlb_lo[!idx])); + if (ptep_buddy) + pte_gpa[!idx] = *ptep_buddy; + spin_unlock(&kvm->mmu_lock); + } + + /* Get the GVA page table entry pair */ + ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE); + if (!ptep_gva) { + kvm_err("No ptep for gva %lx\n", gva); + return -1; + } + + /* Copy a pair of entries from GPA page table to GVA page table */ + ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]); + ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]); + + /* Invalidate this entry in the TLB, current guest mode ASID only */ + kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel); + + kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, + tlb->tlb_lo[0], tlb->tlb_lo[1]); + + return 0; +} + +int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu) +{ + kvm_pfn_t pfn; + pte_t *ptep; + + ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); + if (!ptep) { + kvm_err("No ptep for commpage %lx\n", badvaddr); + return -1; + } + + pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); + /* Also set valid and dirty, so refill handler doesn't have to */ + *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED))); + + /* Invalidate this entry in the TLB, guest kernel ASID only */ + kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); + return 0; +} + +/** + * kvm_mips_migrate_count() - Migrate timer. + * @vcpu: Virtual CPU. + * + * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it + * if it was running prior to being cancelled. + * + * Must be called when the VCPU is migrated to a different CPU to ensure that + * timer expiry during guest execution interrupts the guest and causes the + * interrupt to be delivered in a timely manner. + */ +static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) +{ + if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) + hrtimer_restart(&vcpu->arch.comparecount_timer); +} + +/* Restore ASID once we are scheduled back after preemption */ +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + unsigned long flags; + + kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); + + local_irq_save(flags); + + vcpu->cpu = cpu; + if (vcpu->arch.last_sched_cpu != cpu) { + kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", + vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + /* + * Migrate the timer interrupt to the current CPU so that it + * always interrupts the guest and synchronously triggers a + * guest timer interrupt. + */ + kvm_mips_migrate_count(vcpu); + } + + /* restore guest state to registers */ + kvm_mips_callbacks->vcpu_load(vcpu, cpu); + + local_irq_restore(flags); +} + +/* ASID can change if another task is scheduled during preemption */ +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + vcpu->arch.last_sched_cpu = cpu; + vcpu->cpu = -1; + + /* save guest state in registers */ + kvm_mips_callbacks->vcpu_put(vcpu, cpu); + + local_irq_restore(flags); +} + +/** + * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault. + * @vcpu: Virtual CPU. + * @gva: Guest virtual address to be accessed. + * @write: True if write attempted (must be dirtied and made writable). + * + * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and + * dirtying the page if @write so that guest instructions can be modified. + * + * Returns: KVM_MIPS_MAPPED on success. + * KVM_MIPS_GVA if bad guest virtual address. + * KVM_MIPS_GPA if bad guest physical address. + * KVM_MIPS_TLB if guest TLB not present. + * KVM_MIPS_TLBINV if guest TLB present but not valid. + * KVM_MIPS_TLBMOD if guest TLB read only. + */ +enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, + unsigned long gva, + bool write) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_mips_tlb *tlb; + int index; + + if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) { + if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0) + return KVM_MIPS_GPA; + } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) || + KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) { + /* Address should be in the guest TLB */ + index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID)); + if (index < 0) + return KVM_MIPS_TLB; + tlb = &vcpu->arch.guest_tlb[index]; + + /* Entry should be valid, and dirty for writes */ + if (!TLB_IS_VALID(*tlb, gva)) + return KVM_MIPS_TLBINV; + if (write && !TLB_IS_DIRTY(*tlb, gva)) + return KVM_MIPS_TLBMOD; + + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write)) + return KVM_MIPS_GPA; + } else { + return KVM_MIPS_GVA; + } + + return KVM_MIPS_MAPPED; +} + +int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + int err; + + if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ), + "Expect BadInstr/BadInstrP registers to be used with VZ\n")) + return -EINVAL; + +retry: + kvm_trap_emul_gva_lockless_begin(vcpu); + err = get_user(*out, opc); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (unlikely(err)) { + /* + * Try to handle the fault, maybe we just raced with a GVA + * invalidation. + */ + err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc, + false); + if (unlikely(err)) { + kvm_err("%s: illegal address: %p\n", + __func__, opc); + return -EFAULT; + } + + /* Hopefully it'll work now */ + goto retry; + } + return 0; +} diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S new file mode 100644 index 000000000..d02f0c6cc --- /dev/null +++ b/arch/mips/kvm/msa.S @@ -0,0 +1,161 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * MIPS SIMD Architecture (MSA) context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/asmmacro.h> +#include <asm/regdef.h> + + .set noreorder + .set noat + +LEAF(__kvm_save_msa) + st_d 0, VCPU_FPR0, a0 + st_d 1, VCPU_FPR1, a0 + st_d 2, VCPU_FPR2, a0 + st_d 3, VCPU_FPR3, a0 + st_d 4, VCPU_FPR4, a0 + st_d 5, VCPU_FPR5, a0 + st_d 6, VCPU_FPR6, a0 + st_d 7, VCPU_FPR7, a0 + st_d 8, VCPU_FPR8, a0 + st_d 9, VCPU_FPR9, a0 + st_d 10, VCPU_FPR10, a0 + st_d 11, VCPU_FPR11, a0 + st_d 12, VCPU_FPR12, a0 + st_d 13, VCPU_FPR13, a0 + st_d 14, VCPU_FPR14, a0 + st_d 15, VCPU_FPR15, a0 + st_d 16, VCPU_FPR16, a0 + st_d 17, VCPU_FPR17, a0 + st_d 18, VCPU_FPR18, a0 + st_d 19, VCPU_FPR19, a0 + st_d 20, VCPU_FPR20, a0 + st_d 21, VCPU_FPR21, a0 + st_d 22, VCPU_FPR22, a0 + st_d 23, VCPU_FPR23, a0 + st_d 24, VCPU_FPR24, a0 + st_d 25, VCPU_FPR25, a0 + st_d 26, VCPU_FPR26, a0 + st_d 27, VCPU_FPR27, a0 + st_d 28, VCPU_FPR28, a0 + st_d 29, VCPU_FPR29, a0 + st_d 30, VCPU_FPR30, a0 + st_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_save_msa) + +LEAF(__kvm_restore_msa) + ld_d 0, VCPU_FPR0, a0 + ld_d 1, VCPU_FPR1, a0 + ld_d 2, VCPU_FPR2, a0 + ld_d 3, VCPU_FPR3, a0 + ld_d 4, VCPU_FPR4, a0 + ld_d 5, VCPU_FPR5, a0 + ld_d 6, VCPU_FPR6, a0 + ld_d 7, VCPU_FPR7, a0 + ld_d 8, VCPU_FPR8, a0 + ld_d 9, VCPU_FPR9, a0 + ld_d 10, VCPU_FPR10, a0 + ld_d 11, VCPU_FPR11, a0 + ld_d 12, VCPU_FPR12, a0 + ld_d 13, VCPU_FPR13, a0 + ld_d 14, VCPU_FPR14, a0 + ld_d 15, VCPU_FPR15, a0 + ld_d 16, VCPU_FPR16, a0 + ld_d 17, VCPU_FPR17, a0 + ld_d 18, VCPU_FPR18, a0 + ld_d 19, VCPU_FPR19, a0 + ld_d 20, VCPU_FPR20, a0 + ld_d 21, VCPU_FPR21, a0 + ld_d 22, VCPU_FPR22, a0 + ld_d 23, VCPU_FPR23, a0 + ld_d 24, VCPU_FPR24, a0 + ld_d 25, VCPU_FPR25, a0 + ld_d 26, VCPU_FPR26, a0 + ld_d 27, VCPU_FPR27, a0 + ld_d 28, VCPU_FPR28, a0 + ld_d 29, VCPU_FPR29, a0 + ld_d 30, VCPU_FPR30, a0 + ld_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_restore_msa) + + .macro kvm_restore_msa_upper wr, off, base + .set push + .set noat +#ifdef CONFIG_64BIT + ld $1, \off(\base) + insert_d \wr, 1 +#elif defined(CONFIG_CPU_LITTLE_ENDIAN) + lw $1, \off(\base) + insert_w \wr, 2 + lw $1, (\off+4)(\base) + insert_w \wr, 3 +#else /* CONFIG_CPU_BIG_ENDIAN */ + lw $1, (\off+4)(\base) + insert_w \wr, 2 + lw $1, \off(\base) + insert_w \wr, 3 +#endif + .set pop + .endm + +LEAF(__kvm_restore_msa_upper) + kvm_restore_msa_upper 0, VCPU_FPR0 +8, a0 + kvm_restore_msa_upper 1, VCPU_FPR1 +8, a0 + kvm_restore_msa_upper 2, VCPU_FPR2 +8, a0 + kvm_restore_msa_upper 3, VCPU_FPR3 +8, a0 + kvm_restore_msa_upper 4, VCPU_FPR4 +8, a0 + kvm_restore_msa_upper 5, VCPU_FPR5 +8, a0 + kvm_restore_msa_upper 6, VCPU_FPR6 +8, a0 + kvm_restore_msa_upper 7, VCPU_FPR7 +8, a0 + kvm_restore_msa_upper 8, VCPU_FPR8 +8, a0 + kvm_restore_msa_upper 9, VCPU_FPR9 +8, a0 + kvm_restore_msa_upper 10, VCPU_FPR10+8, a0 + kvm_restore_msa_upper 11, VCPU_FPR11+8, a0 + kvm_restore_msa_upper 12, VCPU_FPR12+8, a0 + kvm_restore_msa_upper 13, VCPU_FPR13+8, a0 + kvm_restore_msa_upper 14, VCPU_FPR14+8, a0 + kvm_restore_msa_upper 15, VCPU_FPR15+8, a0 + kvm_restore_msa_upper 16, VCPU_FPR16+8, a0 + kvm_restore_msa_upper 17, VCPU_FPR17+8, a0 + kvm_restore_msa_upper 18, VCPU_FPR18+8, a0 + kvm_restore_msa_upper 19, VCPU_FPR19+8, a0 + kvm_restore_msa_upper 20, VCPU_FPR20+8, a0 + kvm_restore_msa_upper 21, VCPU_FPR21+8, a0 + kvm_restore_msa_upper 22, VCPU_FPR22+8, a0 + kvm_restore_msa_upper 23, VCPU_FPR23+8, a0 + kvm_restore_msa_upper 24, VCPU_FPR24+8, a0 + kvm_restore_msa_upper 25, VCPU_FPR25+8, a0 + kvm_restore_msa_upper 26, VCPU_FPR26+8, a0 + kvm_restore_msa_upper 27, VCPU_FPR27+8, a0 + kvm_restore_msa_upper 28, VCPU_FPR28+8, a0 + kvm_restore_msa_upper 29, VCPU_FPR29+8, a0 + kvm_restore_msa_upper 30, VCPU_FPR30+8, a0 + kvm_restore_msa_upper 31, VCPU_FPR31+8, a0 + jr ra + nop + END(__kvm_restore_msa_upper) + +LEAF(__kvm_restore_msacsr) + lw t0, VCPU_MSA_CSR(a0) + /* + * The ctcmsa must stay at this offset in __kvm_restore_msacsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an MSA FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + _ctcmsa MSA_CSR, t0 + jr ra + nop + END(__kvm_restore_msacsr) diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c new file mode 100644 index 000000000..53f851a61 --- /dev/null +++ b/arch/mips/kvm/stats.c @@ -0,0 +1,63 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: COP0 access histogram + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/kvm_host.h> + +char *kvm_cop0_str[N_MIPS_COPROC_REGS] = { + "Index", + "Random", + "EntryLo0", + "EntryLo1", + "Context", + "PG Mask", + "Wired", + "HWREna", + "BadVAddr", + "Count", + "EntryHI", + "Compare", + "Status", + "Cause", + "EXC PC", + "PRID", + "Config", + "LLAddr", + "Watch Lo", + "Watch Hi", + "X Context", + "Reserved", + "Impl Dep", + "Debug", + "DEPC", + "PerfCnt", + "ErrCtl", + "CacheErr", + "TagLo", + "TagHi", + "ErrorEPC", + "DESAVE" +}; + +void kvm_mips_dump_stats(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + int i, j; + + kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); + for (i = 0; i < N_MIPS_COPROC_REGS; i++) { + for (j = 0; j < N_MIPS_COPROC_SEL; j++) { + if (vcpu->arch.cop0->stat[i][j]) + kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j, + vcpu->arch.cop0->stat[i][j]); + } + } +#endif +} diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c new file mode 100644 index 000000000..7cd92166a --- /dev/null +++ b/arch/mips/kvm/tlb.c @@ -0,0 +1,660 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS TLB handling, this file is part of the Linux host kernel so that + * TLB handlers run from KSEG0 + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/kvm_host.h> +#include <linux/srcu.h> + +#include <asm/cpu.h> +#include <asm/bootinfo.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/cacheflush.h> +#include <asm/tlb.h> +#include <asm/tlbdebug.h> + +#undef CONFIG_MIPS_MT +#include <asm/r4kcache.h> +#define CONFIG_MIPS_MT + +#define KVM_GUEST_PC_TLB 0 +#define KVM_GUEST_SP_TLB 1 + +#ifdef CONFIG_KVM_MIPS_VZ +unsigned long GUESTID_MASK; +EXPORT_SYMBOL_GPL(GUESTID_MASK); +unsigned long GUESTID_FIRST_VERSION; +EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION); +unsigned long GUESTID_VERSION_MASK; +EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK); + +static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu) +{ + struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm; + + if (cpu_has_guestid) + return 0; + else + return cpu_asid(smp_processor_id(), gpa_mm); +} +#endif + +static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + int cpu = smp_processor_id(); + + return cpu_asid(cpu, kern_mm); +} + +static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) +{ + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + int cpu = smp_processor_id(); + + return cpu_asid(cpu, user_mm); +} + +/* Structure defining an tlb entry data set. */ + +void kvm_mips_dump_host_tlbs(void) +{ + unsigned long flags; + + local_irq_save(flags); + + kvm_info("HOST TLBs:\n"); + dump_tlb_regs(); + pr_info("\n"); + dump_tlb_all(); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs); + +void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_mips_tlb tlb; + int i; + + kvm_info("Guest TLBs:\n"); + kvm_info("Guest EntryHi: %#lx\n", kvm_read_c0_guest_entryhi(cop0)); + + for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { + tlb = vcpu->arch.guest_tlb[i]; + kvm_info("TLB%c%3d Hi 0x%08lx ", + (tlb.tlb_lo[0] | tlb.tlb_lo[1]) & ENTRYLO_V + ? ' ' : '*', + i, tlb.tlb_hi); + kvm_info("Lo0=0x%09llx %c%c attr %lx ", + (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[0]), + (tlb.tlb_lo[0] & ENTRYLO_D) ? 'D' : ' ', + (tlb.tlb_lo[0] & ENTRYLO_G) ? 'G' : ' ', + (tlb.tlb_lo[0] & ENTRYLO_C) >> ENTRYLO_C_SHIFT); + kvm_info("Lo1=0x%09llx %c%c attr %lx sz=%lx\n", + (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[1]), + (tlb.tlb_lo[1] & ENTRYLO_D) ? 'D' : ' ', + (tlb.tlb_lo[1] & ENTRYLO_G) ? 'G' : ' ', + (tlb.tlb_lo[1] & ENTRYLO_C) >> ENTRYLO_C_SHIFT, + tlb.tlb_mask); + } +} +EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs); + +int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) +{ + int i; + int index = -1; + struct kvm_mips_tlb *tlb = vcpu->arch.guest_tlb; + + for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { + if (TLB_HI_VPN2_HIT(tlb[i], entryhi) && + TLB_HI_ASID_HIT(tlb[i], entryhi)) { + index = i; + break; + } + } + + kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n", + __func__, entryhi, index, tlb[i].tlb_lo[0], tlb[i].tlb_lo[1]); + + return index; +} +EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup); + +static int _kvm_mips_host_tlb_inv(unsigned long entryhi) +{ + int idx; + + write_c0_entryhi(entryhi); + mtc0_tlbw_hazard(); + + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + + if (idx >= current_cpu_data.tlbsize) + BUG(); + + if (idx >= 0) { + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + mtc0_tlbw_hazard(); + + tlb_write_indexed(); + tlbw_use_hazard(); + } + + return idx; +} + +int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, + bool user, bool kernel) +{ + /* + * Initialize idx_user and idx_kernel to workaround bogus + * maybe-initialized warning when using GCC 6. + */ + int idx_user = 0, idx_kernel = 0; + unsigned long flags, old_entryhi; + + local_irq_save(flags); + + old_entryhi = read_c0_entryhi(); + + if (user) + idx_user = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu)); + if (kernel) + idx_kernel = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu)); + + write_c0_entryhi(old_entryhi); + mtc0_tlbw_hazard(); + + local_irq_restore(flags); + + /* + * We don't want to get reserved instruction exceptions for missing tlb + * entries. + */ + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (user && idx_user >= 0) + kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu), idx_user); + if (kernel && idx_kernel >= 0) + kvm_debug("%s: Invalidated guest kernel entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu), idx_kernel); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); + +#ifdef CONFIG_KVM_MIPS_VZ + +/* GuestID management */ + +/** + * clear_root_gid() - Set GuestCtl1.RID for normal root operation. + */ +static inline void clear_root_gid(void) +{ + if (cpu_has_guestid) { + clear_c0_guestctl1(MIPS_GCTL1_RID); + mtc0_tlbw_hazard(); + } +} + +/** + * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID. + * + * Sets the root GuestID to match the current guest GuestID, for TLB operation + * on the GPA->RPA mappings in the root TLB. + * + * The caller must be sure to disable HTW while the root GID is set, and + * possibly longer if TLB registers are modified. + */ +static inline void set_root_gid_to_guest_gid(void) +{ + unsigned int guestctl1; + + if (cpu_has_guestid) { + back_to_back_c0_hazard(); + guestctl1 = read_c0_guestctl1(); + guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) | + ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT) + << MIPS_GCTL1_RID_SHIFT; + write_c0_guestctl1(guestctl1); + mtc0_tlbw_hazard(); + } +} + +int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) +{ + int idx; + unsigned long flags, old_entryhi; + + local_irq_save(flags); + htw_stop(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + old_entryhi = read_c0_entryhi(); + + idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu)); + + write_c0_entryhi(old_entryhi); + clear_root_gid(); + mtc0_tlbw_hazard(); + + htw_start(); + local_irq_restore(flags); + + /* + * We don't want to get reserved instruction exceptions for missing tlb + * entries. + */ + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (idx > 0) + kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu), idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv); + +/** + * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping. + * @vcpu: KVM VCPU pointer. + * @gpa: Guest virtual address in a TLB mapped guest segment. + * @gpa: Ponter to output guest physical address it maps to. + * + * Converts a guest virtual address in a guest TLB mapped segment to a guest + * physical address, by probing the guest TLB. + * + * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been + * written. + * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not + * have been written. + */ +int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + unsigned long o_entryhi, o_entrylo[2], o_pagemask; + unsigned int o_index; + unsigned long entrylo[2], pagemask, pagemaskbit, pa; + unsigned long flags; + int index; + + /* Probe the guest TLB for a mapping */ + local_irq_save(flags); + /* Set root GuestID for root probe of guest TLB entry */ + htw_stop(); + set_root_gid_to_guest_gid(); + + o_entryhi = read_gc0_entryhi(); + o_index = read_gc0_index(); + + write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl)); + mtc0_tlbw_hazard(); + guest_tlb_probe(); + tlb_probe_hazard(); + + index = read_gc0_index(); + if (index < 0) { + /* No match, fail */ + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + return -EFAULT; + } + + /* Match! read the TLB entry */ + o_entrylo[0] = read_gc0_entrylo0(); + o_entrylo[1] = read_gc0_entrylo1(); + o_pagemask = read_gc0_pagemask(); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + entrylo[0] = read_gc0_entrylo0(); + entrylo[1] = read_gc0_entrylo1(); + pagemask = ~read_gc0_pagemask() & ~0x1fffl; + + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + write_gc0_entrylo0(o_entrylo[0]); + write_gc0_entrylo1(o_entrylo[1]); + write_gc0_pagemask(o_pagemask); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + + /* Select one of the EntryLo values and interpret the GPA */ + pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1; + pa = entrylo[!!(gva & pagemaskbit)]; + + /* + * TLB entry may have become invalid since TLB probe if physical FTLB + * entries are shared between threads (e.g. I6400). + */ + if (!(pa & ENTRYLO_V)) + return -EFAULT; + + /* + * Note, this doesn't take guest MIPS32 XPA into account, where PFN is + * split with XI/RI in the middle. + */ + pa = (pa << 6) & ~0xfffl; + pa |= gva & ~(pagemask | pagemaskbit); + + *gpa = pa; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup); + +/** + * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for + * guests. + * + * Invalidate all entries in root tlb which are GPA mappings. + */ +void kvm_vz_local_flush_roottlb_all_guests(void) +{ + unsigned long flags; + unsigned long old_entryhi, old_pagemask, old_guestctl1; + int entry; + + if (WARN_ON(!cpu_has_guestid)) + return; + + local_irq_save(flags); + htw_stop(); + + /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */ + old_entryhi = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + old_guestctl1 = read_c0_guestctl1(); + + /* + * Invalidate guest entries in root TLB while leaving root entries + * intact when possible. + */ + for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_read(); + tlb_read_hazard(); + + /* Don't invalidate non-guest (RVA) mappings in the root TLB */ + if (!(read_c0_guestctl1() & MIPS_GCTL1_RID)) + continue; + + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_guestctl1(0); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + + write_c0_entryhi(old_entryhi); + write_c0_pagemask(old_pagemask); + write_c0_guestctl1(old_guestctl1); + tlbw_use_hazard(); + + htw_start(); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests); + +/** + * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries. + * + * Invalidate all entries in guest tlb irrespective of guestid. + */ +void kvm_vz_local_flush_guesttlb_all(void) +{ + unsigned long flags; + unsigned long old_index; + unsigned long old_entryhi; + unsigned long old_entrylo[2]; + unsigned long old_pagemask; + int entry; + u64 cvmmemctl2 = 0; + + local_irq_save(flags); + + /* Preserve all clobbered guest registers */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo[0] = read_gc0_entrylo0(); + old_entrylo[1] = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Inhibit machine check due to multiple matching TLB entries */ + cvmmemctl2 = read_c0_cvmmemctl2(); + cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + break; + }; + + /* Invalidate guest entries in guest TLB */ + write_gc0_entrylo0(0); + write_gc0_entrylo1(0); + write_gc0_pagemask(0); + for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) { + /* Make sure all entries differ. */ + write_gc0_index(entry); + write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry)); + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + if (cvmmemctl2) { + cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + }; + + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo[0]); + write_gc0_entrylo1(old_entrylo[1]); + write_gc0_pagemask(old_pagemask); + tlbw_use_hazard(); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all); + +/** + * kvm_vz_save_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to write TLB entries into. + * @index: Start index. + * @count: Number of entries to save. + * + * Save a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + unsigned int guestctl1 = 0; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + if (cpu_has_guestid) + guestctl1 = read_c0_guestctl1(); + + /* Read each entry from guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + if (cpu_has_guestid && + (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) { + /* Entry invalid or belongs to another guest */ + buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + buf->tlb_lo[0] = 0; + buf->tlb_lo[1] = 0; + buf->tlb_mask = 0; + } else { + /* Entry belongs to the right guest */ + buf->tlb_hi = read_gc0_entryhi(); + buf->tlb_lo[0] = read_gc0_entrylo0(); + buf->tlb_lo[1] = read_gc0_entrylo1(); + buf->tlb_mask = read_gc0_pagemask(); + } + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb); + +/** + * kvm_vz_load_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to read TLB entries from. + * @index: Start index. + * @count: Number of entries to load. + * + * Load a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + + /* Write each entry to guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + write_gc0_entryhi(buf->tlb_hi); + write_gc0_entrylo0(buf->tlb_lo[0]); + write_gc0_entrylo1(buf->tlb_lo[1]); + write_gc0_pagemask(buf->tlb_mask); + + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb); + +#endif + +/** + * kvm_mips_suspend_mm() - Suspend the active mm. + * @cpu The CPU we're running on. + * + * Suspend the active_mm, ready for a switch to a KVM guest virtual address + * space. This is left active for the duration of guest context, including time + * with interrupts enabled, so we need to be careful not to confuse e.g. cache + * management IPIs. + * + * kvm_mips_resume_mm() should be called before context switching to a different + * process so we don't need to worry about reference counting. + * + * This needs to be in static kernel code to avoid exporting init_mm. + */ +void kvm_mips_suspend_mm(int cpu) +{ + cpumask_clear_cpu(cpu, mm_cpumask(current->active_mm)); + current->active_mm = &init_mm; +} +EXPORT_SYMBOL_GPL(kvm_mips_suspend_mm); + +/** + * kvm_mips_resume_mm() - Resume the current process mm. + * @cpu The CPU we're running on. + * + * Resume the mm of the current process, after a switch back from a KVM guest + * virtual address space (see kvm_mips_suspend_mm()). + */ +void kvm_mips_resume_mm(int cpu) +{ + cpumask_set_cpu(cpu, mm_cpumask(current->mm)); + current->active_mm = current->mm; +} +EXPORT_SYMBOL_GPL(kvm_mips_resume_mm); diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h new file mode 100644 index 000000000..a8c7fd7bf --- /dev/null +++ b/arch/mips/kvm/trace.h @@ -0,0 +1,346 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + +/* + * arch/mips/kvm/mips.c + */ +extern bool kvm_trace_guest_mode_change; +int kvm_guest_mode_change_trace_reg(void); +void kvm_guest_mode_change_trace_unreg(void); + +/* + * Tracepoints for VM enters + */ +DECLARE_EVENT_CLASS(kvm_transition, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, pc) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + ), + + TP_printk("PC: 0x%08lx", + __entry->pc) +); + +DEFINE_EVENT(kvm_transition, kvm_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_reenter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_out, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +/* The first 32 exit reasons correspond to Cause.ExcCode */ +#define KVM_TRACE_EXIT_INT 0 +#define KVM_TRACE_EXIT_TLBMOD 1 +#define KVM_TRACE_EXIT_TLBMISS_LD 2 +#define KVM_TRACE_EXIT_TLBMISS_ST 3 +#define KVM_TRACE_EXIT_ADDRERR_LD 4 +#define KVM_TRACE_EXIT_ADDRERR_ST 5 +#define KVM_TRACE_EXIT_SYSCALL 8 +#define KVM_TRACE_EXIT_BREAK_INST 9 +#define KVM_TRACE_EXIT_RESVD_INST 10 +#define KVM_TRACE_EXIT_COP_UNUSABLE 11 +#define KVM_TRACE_EXIT_TRAP_INST 13 +#define KVM_TRACE_EXIT_MSA_FPE 14 +#define KVM_TRACE_EXIT_FPE 15 +#define KVM_TRACE_EXIT_MSA_DISABLED 21 +#define KVM_TRACE_EXIT_GUEST_EXIT 27 +/* Further exit reasons */ +#define KVM_TRACE_EXIT_WAIT 32 +#define KVM_TRACE_EXIT_CACHE 33 +#define KVM_TRACE_EXIT_SIGNAL 34 +/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */ +#define KVM_TRACE_EXIT_GEXCCODE_BASE 64 +#define KVM_TRACE_EXIT_GPSI 64 /* 0 */ +#define KVM_TRACE_EXIT_GSFC 65 /* 1 */ +#define KVM_TRACE_EXIT_HC 66 /* 2 */ +#define KVM_TRACE_EXIT_GRR 67 /* 3 */ +#define KVM_TRACE_EXIT_GVA 72 /* 8 */ +#define KVM_TRACE_EXIT_GHFC 73 /* 9 */ +#define KVM_TRACE_EXIT_GPA 74 /* 10 */ + +/* Tracepoints for VM exits */ +#define kvm_trace_symbol_exit_types \ + { KVM_TRACE_EXIT_INT, "Interrupt" }, \ + { KVM_TRACE_EXIT_TLBMOD, "TLB Mod" }, \ + { KVM_TRACE_EXIT_TLBMISS_LD, "TLB Miss (LD)" }, \ + { KVM_TRACE_EXIT_TLBMISS_ST, "TLB Miss (ST)" }, \ + { KVM_TRACE_EXIT_ADDRERR_LD, "Address Error (LD)" }, \ + { KVM_TRACE_EXIT_ADDRERR_ST, "Address Err (ST)" }, \ + { KVM_TRACE_EXIT_SYSCALL, "System Call" }, \ + { KVM_TRACE_EXIT_BREAK_INST, "Break Inst" }, \ + { KVM_TRACE_EXIT_RESVD_INST, "Reserved Inst" }, \ + { KVM_TRACE_EXIT_COP_UNUSABLE, "COP0/1 Unusable" }, \ + { KVM_TRACE_EXIT_TRAP_INST, "Trap Inst" }, \ + { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ + { KVM_TRACE_EXIT_FPE, "FPE" }, \ + { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ + { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \ + { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ + { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ + { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \ + { KVM_TRACE_EXIT_GPSI, "GPSI" }, \ + { KVM_TRACE_EXIT_GSFC, "GSFC" }, \ + { KVM_TRACE_EXIT_HC, "HC" }, \ + { KVM_TRACE_EXIT_GRR, "GRR" }, \ + { KVM_TRACE_EXIT_GVA, "GVA" }, \ + { KVM_TRACE_EXIT_GHFC, "GHFC" }, \ + { KVM_TRACE_EXIT_GPA, "GPA" } + +TRACE_EVENT(kvm_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), + TP_ARGS(vcpu, reason), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(unsigned int, reason) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->reason = reason; + ), + + TP_printk("[%s]PC: 0x%08lx", + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) +); + +#define KVM_TRACE_MFC0 0 +#define KVM_TRACE_MTC0 1 +#define KVM_TRACE_DMFC0 2 +#define KVM_TRACE_DMTC0 3 +#define KVM_TRACE_RDHWR 4 + +#define KVM_TRACE_HWR_COP0 0 +#define KVM_TRACE_HWR_HWR 1 + +#define KVM_TRACE_COP0(REG, SEL) ((KVM_TRACE_HWR_COP0 << 8) | \ + ((REG) << 3) | (SEL)) +#define KVM_TRACE_HWR(REG, SEL) ((KVM_TRACE_HWR_HWR << 8) | \ + ((REG) << 3) | (SEL)) + +#define kvm_trace_symbol_hwr_ops \ + { KVM_TRACE_MFC0, "MFC0" }, \ + { KVM_TRACE_MTC0, "MTC0" }, \ + { KVM_TRACE_DMFC0, "DMFC0" }, \ + { KVM_TRACE_DMTC0, "DMTC0" }, \ + { KVM_TRACE_RDHWR, "RDHWR" } + +#define kvm_trace_symbol_hwr_cop \ + { KVM_TRACE_HWR_COP0, "COP0" }, \ + { KVM_TRACE_HWR_HWR, "HWR" } + +#define kvm_trace_symbol_hwr_regs \ + { KVM_TRACE_COP0( 0, 0), "Index" }, \ + { KVM_TRACE_COP0( 2, 0), "EntryLo0" }, \ + { KVM_TRACE_COP0( 3, 0), "EntryLo1" }, \ + { KVM_TRACE_COP0( 4, 0), "Context" }, \ + { KVM_TRACE_COP0( 4, 2), "UserLocal" }, \ + { KVM_TRACE_COP0( 5, 0), "PageMask" }, \ + { KVM_TRACE_COP0( 6, 0), "Wired" }, \ + { KVM_TRACE_COP0( 7, 0), "HWREna" }, \ + { KVM_TRACE_COP0( 8, 0), "BadVAddr" }, \ + { KVM_TRACE_COP0( 9, 0), "Count" }, \ + { KVM_TRACE_COP0(10, 0), "EntryHi" }, \ + { KVM_TRACE_COP0(11, 0), "Compare" }, \ + { KVM_TRACE_COP0(12, 0), "Status" }, \ + { KVM_TRACE_COP0(12, 1), "IntCtl" }, \ + { KVM_TRACE_COP0(12, 2), "SRSCtl" }, \ + { KVM_TRACE_COP0(13, 0), "Cause" }, \ + { KVM_TRACE_COP0(14, 0), "EPC" }, \ + { KVM_TRACE_COP0(15, 0), "PRId" }, \ + { KVM_TRACE_COP0(15, 1), "EBase" }, \ + { KVM_TRACE_COP0(16, 0), "Config" }, \ + { KVM_TRACE_COP0(16, 1), "Config1" }, \ + { KVM_TRACE_COP0(16, 2), "Config2" }, \ + { KVM_TRACE_COP0(16, 3), "Config3" }, \ + { KVM_TRACE_COP0(16, 4), "Config4" }, \ + { KVM_TRACE_COP0(16, 5), "Config5" }, \ + { KVM_TRACE_COP0(16, 7), "Config7" }, \ + { KVM_TRACE_COP0(17, 1), "MAAR" }, \ + { KVM_TRACE_COP0(17, 2), "MAARI" }, \ + { KVM_TRACE_COP0(26, 0), "ECC" }, \ + { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ + { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ + { KVM_TRACE_COP0(31, 3), "KScratch2" }, \ + { KVM_TRACE_COP0(31, 4), "KScratch3" }, \ + { KVM_TRACE_COP0(31, 5), "KScratch4" }, \ + { KVM_TRACE_COP0(31, 6), "KScratch5" }, \ + { KVM_TRACE_COP0(31, 7), "KScratch6" }, \ + { KVM_TRACE_HWR( 0, 0), "CPUNum" }, \ + { KVM_TRACE_HWR( 1, 0), "SYNCI_Step" }, \ + { KVM_TRACE_HWR( 2, 0), "CC" }, \ + { KVM_TRACE_HWR( 3, 0), "CCRes" }, \ + { KVM_TRACE_HWR(29, 0), "ULR" } + +TRACE_EVENT(kvm_hwr, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, unsigned int reg, + unsigned long val), + TP_ARGS(vcpu, op, reg, val), + TP_STRUCT__entry( + __field(unsigned long, val) + __field(u16, reg) + __field(u8, op) + ), + + TP_fast_assign( + __entry->val = val; + __entry->reg = reg; + __entry->op = op; + ), + + TP_printk("%s %s (%s:%u:%u) 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_hwr_ops), + __print_symbolic(__entry->reg, + kvm_trace_symbol_hwr_regs), + __print_symbolic(__entry->reg >> 8, + kvm_trace_symbol_hwr_cop), + (__entry->reg >> 3) & 0x1f, + __entry->reg & 0x7, + __entry->val) +); + +#define KVM_TRACE_AUX_RESTORE 0 +#define KVM_TRACE_AUX_SAVE 1 +#define KVM_TRACE_AUX_ENABLE 2 +#define KVM_TRACE_AUX_DISABLE 3 +#define KVM_TRACE_AUX_DISCARD 4 + +#define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_MSA 2 +#define KVM_TRACE_AUX_FPU_MSA 3 + +#define kvm_trace_symbol_aux_op \ + { KVM_TRACE_AUX_RESTORE, "restore" }, \ + { KVM_TRACE_AUX_SAVE, "save" }, \ + { KVM_TRACE_AUX_ENABLE, "enable" }, \ + { KVM_TRACE_AUX_DISABLE, "disable" }, \ + { KVM_TRACE_AUX_DISCARD, "discard" } + +#define kvm_trace_symbol_aux_state \ + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_MSA, "MSA" }, \ + { KVM_TRACE_AUX_FPU_MSA, "FPU & MSA" } + +TRACE_EVENT(kvm_aux, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, + unsigned int state), + TP_ARGS(vcpu, op, state), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, op) + __field(u8, state) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->op = op; + __entry->state = state; + ), + + TP_printk("%s %s PC: 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_aux_op), + __print_symbolic(__entry->state, + kvm_trace_symbol_aux_state), + __entry->pc) +); + +TRACE_EVENT(kvm_asid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int old_asid, + unsigned int new_asid), + TP_ARGS(vcpu, old_asid, new_asid), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, old_asid) + __field(u8, new_asid) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->old_asid = old_asid; + __entry->new_asid = new_asid; + ), + + TP_printk("PC: 0x%08lx old: 0x%02x new: 0x%02x", + __entry->pc, + __entry->old_asid, + __entry->new_asid) +); + +TRACE_EVENT(kvm_guestid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid), + TP_ARGS(vcpu, guestid), + TP_STRUCT__entry( + __field(unsigned int, guestid) + ), + + TP_fast_assign( + __entry->guestid = guestid; + ), + + TP_printk("GuestID: 0x%02x", + __entry->guestid) +); + +TRACE_EVENT_FN(kvm_guest_mode_change, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, epc) + __field(unsigned long, pc) + __field(unsigned long, badvaddr) + __field(unsigned int, status) + __field(unsigned int, cause) + ), + + TP_fast_assign( + __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0); + __entry->pc = vcpu->arch.pc; + __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0); + __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0); + __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0); + ), + + TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx", + __entry->epc, + __entry->pc, + __entry->status, + __entry->cause, + __entry->badvaddr), + + kvm_guest_mode_change_trace_reg, + kvm_guest_mode_change_trace_unreg +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c new file mode 100644 index 000000000..6a0d7040d --- /dev/null +++ b/arch/mips/kvm/trap_emul.c @@ -0,0 +1,1329 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Deliver/Emulate exceptions to the guest kernel + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/log2.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> + +#include "interrupt.h" + +static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) +{ + gpa_t gpa; + gva_t kseg = KSEGX(gva); + gva_t gkseg = KVM_GUEST_KSEGX(gva); + + if ((kseg == CKSEG0) || (kseg == CKSEG1)) + gpa = CPHYSADDR(gva); + else if (gkseg == KVM_GUEST_KSEG0) + gpa = KVM_GUEST_CPHYSADDR(gva); + else { + kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); + kvm_mips_dump_host_tlbs(); + gpa = KVM_INVALID_ADDR; + } + + kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa); + + return gpa; +} + +static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu) +{ + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 inst = 0; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + +static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* FPU Unusable */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) { + /* + * Unusable/no FPU in guest: + * deliver guest COP1 Unusable Exception + */ + er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); + } else { + /* Restore FPU state */ + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + } else { + er = kvm_mips_emulate_inst(cause, opc, run, vcpu); + } + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + case EMULATE_WAIT: + run->exit_reason = KVM_EXIT_INTR; + ret = RESUME_HOST; + break; + + case EMULATE_HYPERCALL: + ret = kvm_mips_handle_hypcall(vcpu); + break; + + default: + BUG(); + } + return ret; +} + +static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + int err; + + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Emulate the load */ + er = kvm_mips_emulate_load(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Emulate load from MMIO space failed\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } else { + run->exit_reason = KVM_EXIT_MMIO; + } + return RESUME_HOST; +} + +static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + int err; + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Emulate the store */ + er = kvm_mips_emulate_store(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Emulate store to MMIO space failed\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } else { + run->exit_reason = KVM_EXIT_MMIO; + } + return RESUME_HOST; +} + +static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu, bool store) +{ + if (store) + return kvm_mips_bad_store(cause, opc, run, vcpu); + else + return kvm_mips_bad_load(cause, opc, run, vcpu); +} + +static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 cause = vcpu->arch.host_cp0_cause; + struct kvm_mips_tlb *tlb; + unsigned long entryhi; + int index; + + if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 + || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { + /* + * First find the mapping in the guest TLB. If the failure to + * write was due to the guest TLB, it should be up to the guest + * to handle it. + */ + entryhi = (badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); + + /* + * These should never happen. + * They would indicate stale host TLB entries. + */ + if (unlikely(index < 0)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + tlb = vcpu->arch.guest_tlb + index; + if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* + * Guest entry not dirty? That would explain the TLB modified + * exception. Relay that on to the guest so it can handle it. + */ + if (!TLB_IS_DIRTY(*tlb, badvaddr)) { + kvm_mips_emulate_tlbmod(cause, opc, run, vcpu); + return RESUME_GUEST; + } + + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr, + true)) + /* Not writable, needs handling as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); + return RESUME_GUEST; + } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { + if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0) + /* Not writable, needs handling as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); + return RESUME_GUEST; + } else { + /* host kernel addresses are all handled as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); + } +} + +static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) + && KVM_GUEST_KERNEL_MODE(vcpu)) { + if (kvm_mips_handle_commpage_tlb_fault(badvaddr, vcpu) < 0) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 + || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { + kvm_debug("USER ADDR TLB %s fault: cause %#x, PC: %p, BadVaddr: %#lx\n", + store ? "ST" : "LD", cause, opc, badvaddr); + + /* + * User Address (UA) fault, this could happen if + * (1) TLB entry not present/valid in both Guest and shadow host + * TLBs, in this case we pass on the fault to the guest + * kernel and let it handle it. + * (2) TLB entry is present in the Guest TLB but not in the + * shadow, in this case we inject the TLB from the Guest TLB + * into the shadow host TLB + */ + + er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store); + if (er == EMULATE_DONE) + ret = RESUME_GUEST; + else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { + /* + * All KSEG0 faults are handled by KVM, as the guest kernel does + * not expect to ever get them + */ + if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0) + ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); + } else if (KVM_GUEST_KERNEL_MODE(vcpu) + && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { + /* + * With EVA we may get a TLB exception instead of an address + * error when the guest performs MMIO to KSeg1 addresses. + */ + ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); + } else { + kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", + store ? "ST" : "LD", cause, opc, badvaddr); + kvm_mips_dump_host_tlbs(); + kvm_arch_vcpu_dump_regs(vcpu); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) +{ + return kvm_trap_emul_handle_tlb_miss(vcpu, true); +} + +static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +{ + return kvm_trap_emul_handle_tlb_miss(vcpu, false); +} + +static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 cause = vcpu->arch.host_cp0_cause; + int ret = RESUME_GUEST; + + if (KVM_GUEST_KERNEL_MODE(vcpu) + && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { + ret = kvm_mips_bad_store(cause, opc, run, vcpu); + } else { + kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 cause = vcpu->arch.host_cp0_cause; + int ret = RESUME_GUEST; + + if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { + ret = kvm_mips_bad_load(cause, opc, run, vcpu); + } else { + kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_syscall(cause, opc, run, vcpu); + if (er == EMULATE_DONE) + ret = RESUME_GUEST; + else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_handle_ri(cause, opc, run, vcpu); + if (er == EMULATE_DONE) + ret = RESUME_GUEST; + else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_bp_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) + ret = RESUME_GUEST; + else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +/** + * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled. + */ +static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_run *run = vcpu->run; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) { + /* + * No MSA in guest, or FPU enabled and not in FR=1 mode, + * guest reserved instruction exception + */ + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) { + /* MSA disabled by guest, guest MSA disabled exception */ + er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu); + } else { + /* Restore MSA/FPU state */ + kvm_own_msa(vcpu); + er = EMULATE_DONE; + } + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + +static int kvm_trap_emul_hardware_enable(void) +{ + return 0; +} + +static void kvm_trap_emul_hardware_disable(void) +{ +} + +static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_MIPS_TE: + r = 1; + break; + default: + r = 0; + break; + } + + return r; +} + +static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + + /* + * Allocate GVA -> HPA page tables. + * MIPS doesn't use the mm_struct pointer argument. + */ + kern_mm->pgd = pgd_alloc(kern_mm); + if (!kern_mm->pgd) + return -ENOMEM; + + user_mm->pgd = pgd_alloc(user_mm); + if (!user_mm->pgd) { + pgd_free(kern_mm, kern_mm->pgd); + return -ENOMEM; + } + + return 0; +} + +static void kvm_mips_emul_free_gva_pt(pgd_t *pgd) +{ + /* Don't free host kernel page tables copied from init_mm.pgd */ + const unsigned long end = 0x80000000; + unsigned long pgd_va, pud_va, pmd_va; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + + for (i = 0; i < USER_PTRS_PER_PGD; i++) { + if (pgd_none(pgd[i])) + continue; + + pgd_va = (unsigned long)i << PGDIR_SHIFT; + if (pgd_va >= end) + break; + pud = pud_offset(pgd + i, 0); + for (j = 0; j < PTRS_PER_PUD; j++) { + if (pud_none(pud[j])) + continue; + + pud_va = pgd_va | ((unsigned long)j << PUD_SHIFT); + if (pud_va >= end) + break; + pmd = pmd_offset(pud + j, 0); + for (k = 0; k < PTRS_PER_PMD; k++) { + if (pmd_none(pmd[k])) + continue; + + pmd_va = pud_va | (k << PMD_SHIFT); + if (pmd_va >= end) + break; + pte = pte_offset(pmd + k, 0); + pte_free_kernel(NULL, pte); + } + pmd_free(NULL, pmd); + } + pud_free(NULL, pud); + } + pgd_free(NULL, pgd); +} + +static void kvm_trap_emul_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mips_emul_free_gva_pt(vcpu->arch.guest_kernel_mm.pgd); + kvm_mips_emul_free_gva_pt(vcpu->arch.guest_user_mm.pgd); +} + +static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 config, config1; + int vcpu_id = vcpu->vcpu_id; + + /* Start off the timer at 100 MHz */ + kvm_mips_init_count(vcpu, 100*1000*1000); + + /* + * Arch specific stuff, set up config registers properly so that the + * guest will come up as expected + */ +#ifndef CONFIG_CPU_MIPSR6 + /* r2-r5, simulate a MIPS 24kc */ + kvm_write_c0_guest_prid(cop0, 0x00019300); +#else + /* r6+, simulate a generic QEMU machine */ + kvm_write_c0_guest_prid(cop0, 0x00010000); +#endif + /* + * Have config1, Cacheable, noncoherent, write-back, write allocate. + * Endianness, arch revision & virtually tagged icache should match + * host. + */ + config = read_c0_config() & MIPS_CONF_AR; + config |= MIPS_CONF_M | CONF_CM_CACHABLE_NONCOHERENT | MIPS_CONF_MT_TLB; +#ifdef CONFIG_CPU_BIG_ENDIAN + config |= CONF_BE; +#endif + if (cpu_has_vtag_icache) + config |= MIPS_CONF_VI; + kvm_write_c0_guest_config(cop0, config); + + /* Read the cache characteristics from the host Config1 Register */ + config1 = (read_c0_config1() & ~0x7f); + + /* DCache line size not correctly reported in Config1 on Octeon CPUs */ + if (cpu_dcache_line_size()) { + config1 &= ~MIPS_CONF1_DL; + config1 |= ((ilog2(cpu_dcache_line_size()) - 1) << + MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL; + } + + /* Set up MMU size */ + config1 &= ~(0x3f << 25); + config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); + + /* We unset some bits that we aren't emulating */ + config1 &= ~(MIPS_CONF1_C2 | MIPS_CONF1_MD | MIPS_CONF1_PC | + MIPS_CONF1_WR | MIPS_CONF1_CA); + kvm_write_c0_guest_config1(cop0, config1); + + /* Have config3, no tertiary/secondary caches implemented */ + kvm_write_c0_guest_config2(cop0, MIPS_CONF_M); + /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */ + + /* Have config4, UserLocal */ + kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI); + + /* Have config5 */ + kvm_write_c0_guest_config4(cop0, MIPS_CONF_M); + + /* No config6 */ + kvm_write_c0_guest_config5(cop0, 0); + + /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ + kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); + + /* Status */ + kvm_write_c0_guest_status(cop0, ST0_BEV | ST0_ERL); + + /* + * Setup IntCtl defaults, compatibility mode for timer interrupts (HW5) + */ + kvm_write_c0_guest_intctl(cop0, 0xFC000000); + + /* Put in vcpu id as CPUNum into Ebase Reg to handle SMP Guests */ + kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 | + (vcpu_id & MIPS_EBASE_CPUNUM)); + + /* Put PC at guest reset vector */ + vcpu->arch.pc = KVM_GUEST_CKSEG1ADDR(0x1fc00000); + + return 0; +} + +static void kvm_trap_emul_flush_shadow_all(struct kvm *kvm) +{ + /* Flush GVA page tables and invalidate GVA ASIDs on all VCPUs */ + kvm_flush_remote_tlbs(kvm); +} + +static void kvm_trap_emul_flush_shadow_memslot(struct kvm *kvm, + const struct kvm_memory_slot *slot) +{ + kvm_trap_emul_flush_shadow_all(kvm); +} + +static u64 kvm_trap_emul_get_one_regs[] = { + KVM_REG_MIPS_CP0_INDEX, + KVM_REG_MIPS_CP0_ENTRYLO0, + KVM_REG_MIPS_CP0_ENTRYLO1, + KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_USERLOCAL, + KVM_REG_MIPS_CP0_PAGEMASK, + KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, + KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, + KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, + KVM_REG_MIPS_CP0_STATUS, + KVM_REG_MIPS_CP0_INTCTL, + KVM_REG_MIPS_CP0_CAUSE, + KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, + KVM_REG_MIPS_CP0_EBASE, + KVM_REG_MIPS_CP0_CONFIG, + KVM_REG_MIPS_CP0_CONFIG1, + KVM_REG_MIPS_CP0_CONFIG2, + KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, + KVM_REG_MIPS_CP0_CONFIG7, + KVM_REG_MIPS_CP0_ERROREPC, + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, +}; + +static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(kvm_trap_emul_get_one_regs); +} + +static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu, + u64 __user *indices) +{ + if (copy_to_user(indices, kvm_trap_emul_get_one_regs, + sizeof(kvm_trap_emul_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_trap_emul_get_one_regs); + + return 0; +} + +static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 *v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + *v = (long)kvm_read_c0_guest_index(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + *v = kvm_read_c0_guest_entrylo0(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + *v = kvm_read_c0_guest_entrylo1(cop0); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + *v = (long)kvm_read_c0_guest_context(cop0); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + *v = (long)kvm_read_c0_guest_userlocal(cop0); + break; + case KVM_REG_MIPS_CP0_PAGEMASK: + *v = (long)kvm_read_c0_guest_pagemask(cop0); + break; + case KVM_REG_MIPS_CP0_WIRED: + *v = (long)kvm_read_c0_guest_wired(cop0); + break; + case KVM_REG_MIPS_CP0_HWRENA: + *v = (long)kvm_read_c0_guest_hwrena(cop0); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + *v = (long)kvm_read_c0_guest_badvaddr(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + *v = (long)kvm_read_c0_guest_entryhi(cop0); + break; + case KVM_REG_MIPS_CP0_COMPARE: + *v = (long)kvm_read_c0_guest_compare(cop0); + break; + case KVM_REG_MIPS_CP0_STATUS: + *v = (long)kvm_read_c0_guest_status(cop0); + break; + case KVM_REG_MIPS_CP0_INTCTL: + *v = (long)kvm_read_c0_guest_intctl(cop0); + break; + case KVM_REG_MIPS_CP0_CAUSE: + *v = (long)kvm_read_c0_guest_cause(cop0); + break; + case KVM_REG_MIPS_CP0_EPC: + *v = (long)kvm_read_c0_guest_epc(cop0); + break; + case KVM_REG_MIPS_CP0_PRID: + *v = (long)kvm_read_c0_guest_prid(cop0); + break; + case KVM_REG_MIPS_CP0_EBASE: + *v = (long)kvm_read_c0_guest_ebase(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG: + *v = (long)kvm_read_c0_guest_config(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG1: + *v = (long)kvm_read_c0_guest_config1(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG2: + *v = (long)kvm_read_c0_guest_config2(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG3: + *v = (long)kvm_read_c0_guest_config3(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG4: + *v = (long)kvm_read_c0_guest_config4(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + *v = (long)kvm_read_c0_guest_config5(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG7: + *v = (long)kvm_read_c0_guest_config7(cop0); + break; + case KVM_REG_MIPS_CP0_COUNT: + *v = kvm_mips_read_count(vcpu); + break; + case KVM_REG_MIPS_COUNT_CTL: + *v = vcpu->arch.count_ctl; + break; + case KVM_REG_MIPS_COUNT_RESUME: + *v = ktime_to_ns(vcpu->arch.count_resume); + break; + case KVM_REG_MIPS_COUNT_HZ: + *v = vcpu->arch.count_hz; + break; + case KVM_REG_MIPS_CP0_ERROREPC: + *v = (long)kvm_read_c0_guest_errorepc(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1: + *v = (long)kvm_read_c0_guest_kscratch1(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH2: + *v = (long)kvm_read_c0_guest_kscratch2(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH3: + *v = (long)kvm_read_c0_guest_kscratch3(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH4: + *v = (long)kvm_read_c0_guest_kscratch4(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH5: + *v = (long)kvm_read_c0_guest_kscratch5(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH6: + *v = (long)kvm_read_c0_guest_kscratch6(cop0); + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int ret = 0; + unsigned int cur, change; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + kvm_write_c0_guest_index(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + kvm_write_c0_guest_entrylo0(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + kvm_write_c0_guest_entrylo1(cop0, v); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + kvm_write_c0_guest_context(cop0, v); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + kvm_write_c0_guest_userlocal(cop0, v); + break; + case KVM_REG_MIPS_CP0_PAGEMASK: + kvm_write_c0_guest_pagemask(cop0, v); + break; + case KVM_REG_MIPS_CP0_WIRED: + kvm_write_c0_guest_wired(cop0, v); + break; + case KVM_REG_MIPS_CP0_HWRENA: + kvm_write_c0_guest_hwrena(cop0, v); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + kvm_write_c0_guest_badvaddr(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + kvm_write_c0_guest_entryhi(cop0, v); + break; + case KVM_REG_MIPS_CP0_STATUS: + kvm_write_c0_guest_status(cop0, v); + break; + case KVM_REG_MIPS_CP0_INTCTL: + /* No VInt, so no VS, read-only for now */ + break; + case KVM_REG_MIPS_CP0_EPC: + kvm_write_c0_guest_epc(cop0, v); + break; + case KVM_REG_MIPS_CP0_PRID: + kvm_write_c0_guest_prid(cop0, v); + break; + case KVM_REG_MIPS_CP0_EBASE: + /* + * Allow core number to be written, but the exception base must + * remain in guest KSeg0. + */ + kvm_change_c0_guest_ebase(cop0, 0x1ffff000 | MIPS_EBASE_CPUNUM, + v); + break; + case KVM_REG_MIPS_CP0_COUNT: + kvm_mips_write_count(vcpu, v); + break; + case KVM_REG_MIPS_CP0_COMPARE: + kvm_mips_write_compare(vcpu, v, false); + break; + case KVM_REG_MIPS_CP0_CAUSE: + /* + * If the timer is stopped or started (DC bit) it must look + * atomic with changes to the interrupt pending bits (TI, IRQ5). + * A timer interrupt should not happen in between. + */ + if ((kvm_read_c0_guest_cause(cop0) ^ v) & CAUSEF_DC) { + if (v & CAUSEF_DC) { + /* disable timer first */ + kvm_mips_count_disable_cause(vcpu); + kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, + v); + } else { + /* enable timer last */ + kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, + v); + kvm_mips_count_enable_cause(vcpu); + } + } else { + kvm_write_c0_guest_cause(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG1: + cur = kvm_read_c0_guest_config1(cop0); + change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config1(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG3: + cur = kvm_read_c0_guest_config3(cop0); + change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config3(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + cur = kvm_read_c0_guest_config4(cop0); + change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config4(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + cur = kvm_read_c0_guest_config5(cop0); + change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config5(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG7: + /* writes ignored */ + break; + case KVM_REG_MIPS_COUNT_CTL: + ret = kvm_mips_set_count_ctl(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_RESUME: + ret = kvm_mips_set_count_resume(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_set_count_hz(vcpu, v); + break; + case KVM_REG_MIPS_CP0_ERROREPC: + kvm_write_c0_guest_errorepc(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1: + kvm_write_c0_guest_kscratch1(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH2: + kvm_write_c0_guest_kscratch2(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH3: + kvm_write_c0_guest_kscratch3(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH4: + kvm_write_c0_guest_kscratch4(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH5: + kvm_write_c0_guest_kscratch5(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH6: + kvm_write_c0_guest_kscratch6(cop0, v); + break; + default: + return -EINVAL; + } + return ret; +} + +static int kvm_trap_emul_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + + /* + * Were we in guest context? If so, restore the appropriate ASID based + * on the mode of the Guest (Kernel/User). + */ + if (current->flags & PF_VCPU) { + mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(mm, cpu); + write_c0_entryhi(cpu_asid(cpu, mm)); + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); + kvm_mips_suspend_mm(cpu); + ehb(); + } + + return 0; +} + +static int kvm_trap_emul_vcpu_put(struct kvm_vcpu *vcpu, int cpu) +{ + kvm_lose_fpu(vcpu); + + if (current->flags & PF_VCPU) { + /* Restore normal Linux process memory map */ + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu))) + get_new_mmu_context(current->mm, cpu); + write_c0_entryhi(cpu_asid(cpu, current->mm)); + TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); + kvm_mips_resume_mm(cpu); + ehb(); + } + + return 0; +} + +static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, + bool reload_asid) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + int i; + + if (likely(!kvm_request_pending(vcpu))) + return; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + /* + * Both kernel & user GVA mappings must be invalidated. The + * caller is just about to check whether the ASID is stale + * anyway so no need to reload it here. + */ + kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_GPA | KMF_KERN); + kvm_mips_flush_gva_pt(user_mm->pgd, KMF_GPA | KMF_USER); + for_each_possible_cpu(i) { + cpu_context(i, kern_mm) = 0; + cpu_context(i, user_mm) = 0; + } + + /* Generate new ASID for current mode */ + if (reload_asid) { + mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; + get_new_mmu_context(mm, cpu); + htw_stop(); + write_c0_entryhi(cpu_asid(cpu, mm)); + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); + htw_start(); + } + } +} + +/** + * kvm_trap_emul_gva_lockless_begin() - Begin lockless access to GVA space. + * @vcpu: VCPU pointer. + * + * Call before a GVA space access outside of guest mode, to ensure that + * asynchronous TLB flush requests are handled or delayed until completion of + * the GVA access (as indicated by a matching kvm_trap_emul_gva_lockless_end()). + * + * Should be called with IRQs already enabled. + */ +void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu) +{ + /* We re-enable IRQs in kvm_trap_emul_gva_lockless_end() */ + WARN_ON_ONCE(irqs_disabled()); + + /* + * The caller is about to access the GVA space, so we set the mode to + * force TLB flush requests to send an IPI, and also disable IRQs to + * delay IPI handling until kvm_trap_emul_gva_lockless_end(). + */ + local_irq_disable(); + + /* + * Make sure the read of VCPU requests is not reordered ahead of the + * write to vcpu->mode, or we could miss a TLB flush request while + * the requester sees the VCPU as outside of guest mode and not needing + * an IPI. + */ + smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); + + /* + * If a TLB flush has been requested (potentially while + * OUTSIDE_GUEST_MODE and assumed immediately effective), perform it + * before accessing the GVA space, and be sure to reload the ASID if + * necessary as it'll be immediately used. + * + * TLB flush requests after this check will trigger an IPI due to the + * mode change above, which will be delayed due to IRQs disabled. + */ + kvm_trap_emul_check_requests(vcpu, smp_processor_id(), true); +} + +/** + * kvm_trap_emul_gva_lockless_end() - End lockless access to GVA space. + * @vcpu: VCPU pointer. + * + * Called after a GVA space access outside of guest mode. Should have a matching + * call to kvm_trap_emul_gva_lockless_begin(). + */ +void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu) +{ + /* + * Make sure the write to vcpu->mode is not reordered in front of GVA + * accesses, or a TLB flush requester may not think it necessary to send + * an IPI. + */ + smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); + + /* + * Now that the access to GVA space is complete, its safe for pending + * TLB flush request IPIs to be handled (which indicates completion). + */ + local_irq_enable(); +} + +static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + struct mips_coproc *cop0 = vcpu->arch.cop0; + int i, cpu = smp_processor_id(); + unsigned int gasid; + + /* + * No need to reload ASID, IRQs are disabled already so there's no rush, + * and we'll check if we need to regenerate below anyway before + * re-entering the guest. + */ + kvm_trap_emul_check_requests(vcpu, cpu, false); + + if (KVM_GUEST_KERNEL_MODE(vcpu)) { + mm = kern_mm; + } else { + mm = user_mm; + + /* + * Lazy host ASID regeneration / PT flush for guest user mode. + * If the guest ASID has changed since the last guest usermode + * execution, invalidate the stale TLB entries and flush GVA PT + * entries too. + */ + gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID; + if (gasid != vcpu->arch.last_user_gasid) { + kvm_mips_flush_gva_pt(user_mm->pgd, KMF_USER); + for_each_possible_cpu(i) + cpu_context(i, user_mm) = 0; + vcpu->arch.last_user_gasid = gasid; + } + } + + /* + * Check if ASID is stale. This may happen due to a TLB flush request or + * a lazy user MM invalidation. + */ + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(mm, cpu); +} + +static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int r; + + /* Check if we have any exceptions/interrupts pending */ + kvm_mips_deliver_interrupts(vcpu, + kvm_read_c0_guest_cause(vcpu->arch.cop0)); + + kvm_trap_emul_vcpu_reenter(run, vcpu); + + /* + * We use user accessors to access guest memory, but we don't want to + * invoke Linux page faulting. + */ + pagefault_disable(); + + /* Disable hardware page table walking while in guest */ + htw_stop(); + + /* + * While in guest context we're in the guest's address space, not the + * host process address space, so we need to be careful not to confuse + * e.g. cache management IPIs. + */ + kvm_mips_suspend_mm(cpu); + + r = vcpu->arch.vcpu_run(run, vcpu); + + /* We may have migrated while handling guest exits */ + cpu = smp_processor_id(); + + /* Restore normal Linux process memory map */ + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu))) + get_new_mmu_context(current->mm, cpu); + write_c0_entryhi(cpu_asid(cpu, current->mm)); + TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); + kvm_mips_resume_mm(cpu); + + htw_start(); + + pagefault_enable(); + + return r; +} + +static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { + /* exit handlers */ + .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, + .handle_tlb_mod = kvm_trap_emul_handle_tlb_mod, + .handle_tlb_st_miss = kvm_trap_emul_handle_tlb_st_miss, + .handle_tlb_ld_miss = kvm_trap_emul_handle_tlb_ld_miss, + .handle_addr_err_st = kvm_trap_emul_handle_addr_err_st, + .handle_addr_err_ld = kvm_trap_emul_handle_addr_err_ld, + .handle_syscall = kvm_trap_emul_handle_syscall, + .handle_res_inst = kvm_trap_emul_handle_res_inst, + .handle_break = kvm_trap_emul_handle_break, + .handle_trap = kvm_trap_emul_handle_trap, + .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, + .handle_fpe = kvm_trap_emul_handle_fpe, + .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, + .handle_guest_exit = kvm_trap_emul_no_handler, + + .hardware_enable = kvm_trap_emul_hardware_enable, + .hardware_disable = kvm_trap_emul_hardware_disable, + .check_extension = kvm_trap_emul_check_extension, + .vcpu_init = kvm_trap_emul_vcpu_init, + .vcpu_uninit = kvm_trap_emul_vcpu_uninit, + .vcpu_setup = kvm_trap_emul_vcpu_setup, + .flush_shadow_all = kvm_trap_emul_flush_shadow_all, + .flush_shadow_memslot = kvm_trap_emul_flush_shadow_memslot, + .gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb, + .queue_timer_int = kvm_mips_queue_timer_int_cb, + .dequeue_timer_int = kvm_mips_dequeue_timer_int_cb, + .queue_io_int = kvm_mips_queue_io_int_cb, + .dequeue_io_int = kvm_mips_dequeue_io_int_cb, + .irq_deliver = kvm_mips_irq_deliver_cb, + .irq_clear = kvm_mips_irq_clear_cb, + .num_regs = kvm_trap_emul_num_regs, + .copy_reg_indices = kvm_trap_emul_copy_reg_indices, + .get_one_reg = kvm_trap_emul_get_one_reg, + .set_one_reg = kvm_trap_emul_set_one_reg, + .vcpu_load = kvm_trap_emul_vcpu_load, + .vcpu_put = kvm_trap_emul_vcpu_put, + .vcpu_run = kvm_trap_emul_vcpu_run, + .vcpu_reenter = kvm_trap_emul_vcpu_reenter, +}; + +int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) +{ + *install_callbacks = &kvm_trap_emul_callbacks; + return 0; +} diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c new file mode 100644 index 000000000..74805035e --- /dev/null +++ b/arch/mips/kvm/vz.c @@ -0,0 +1,3223 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Support for hardware virtualization extensions + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Yann Le Du <ledu@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/preempt.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> +#include <asm/cacheops.h> +#include <asm/cmpxchg.h> +#include <asm/fpu.h> +#include <asm/hazards.h> +#include <asm/inst.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> +#include <asm/time.h> +#include <asm/tlb.h> +#include <asm/tlbex.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" + +#include "trace.h" + +/* Pointers to last VCPU loaded on each physical CPU */ +static struct kvm_vcpu *last_vcpu[NR_CPUS]; +/* Pointers to last VCPU executed on each physical CPU */ +static struct kvm_vcpu *last_exec_vcpu[NR_CPUS]; + +/* + * Number of guest VTLB entries to use, so we can catch inconsistency between + * CPUs. + */ +static unsigned int kvm_vz_guest_vtlb_size; + +static inline long kvm_vz_read_gc0_ebase(void) +{ + if (sizeof(long) == 8 && cpu_has_ebase_wg) + return read_gc0_ebase_64(); + else + return read_gc0_ebase(); +} + +static inline void kvm_vz_write_gc0_ebase(long v) +{ + /* + * First write with WG=1 to write upper bits, then write again in case + * WG should be left at 0. + * write_gc0_ebase_64() is no longer UNDEFINED since R6. + */ + if (sizeof(long) == 8 && + (cpu_has_mips64r6 || cpu_has_ebase_wg)) { + write_gc0_ebase_64(v | MIPS_EBASE_WG); + write_gc0_ebase_64(v); + } else { + write_gc0_ebase(v | MIPS_EBASE_WG); + write_gc0_ebase(v); + } +} + +/* + * These Config bits may be writable by the guest: + * Config: [K23, KU] (!TLB), K0 + * Config1: (none) + * Config2: [TU, SU] (impl) + * Config3: ISAOnExc + * Config4: FTLBPageSize + * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR + */ + +static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return CONF_CM_CMASK; +} + +static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return MIPS_CONF3_ISA_OE; +} + +static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu) +{ + /* no need to be exact */ + return MIPS_CONF4_VFTLBPAGESIZE; +} + +static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI; + + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_ufr) + mask |= MIPS_CONF5_UFR; + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE; + } + + return mask; +} + +/* + * VZ optionally allows these additional Config bits to be written by root: + * Config: M, [MT] + * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP + * Config2: M + * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC, + * VInt, SP, CDMM, MT, SM, TL] + * Config4: M, [VTLBSizeExt, MMUSizeExt] + * Config5: MRP + */ + +static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; +} + +static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M | + MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; +} + +static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP; +} + +static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva) +{ + /* VZ guest has already converted gva to gpa */ + return gva; +} + +static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + set_bit(priority, &vcpu->arch.pending_exceptions); + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + clear_bit(priority, &vcpu->arch.pending_exceptions); + set_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + switch (intr) { + case 2: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case 3: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case 4: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + switch (intr) { + case -2: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case -3: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case -4: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO] = C_IRQ0, + [MIPS_EXC_INT_IPI_1] = C_IRQ1, + [MIPS_EXC_INT_IPI_2] = C_IRQ2, +}; + +static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_vz_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + set_gc0_cause(C_TI); + break; + + case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + if (cpu_has_guestctl2) + set_c0_guestctl2(irq); + else + set_gc0_cause(irq); + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions); + return 1; +} + +static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_vz_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + /* + * Call to kvm_write_c0_guest_compare() clears Cause.TI in + * kvm_mips_emulate_CP0(). Explicitly clear irq associated with + * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not + * supported or if not using GuestCtl2 Hardware Clear. + */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + /* Clear GuestCtl2.VIP irq if not using Hardware Clear */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); + return 1; +} + +/* + * VZ guest timer handling. + */ + +/** + * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer. + * @vcpu: Virtual CPU. + * + * Returns: true if the VZ GTOffset & real guest CP0_Count should be used + * instead of software emulation of guest timer. + * false otherwise. + */ +static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu) +{ + if (kvm_mips_count_disabled(vcpu)) + return false; + + /* Chosen frequency must match real frequency */ + if (mips_hpt_frequency != vcpu->arch.count_hz) + return false; + + /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */ + if (current_cpu_data.gtoffset_mask != 0xffffffff) + return false; + + return true; +} + +/** + * _kvm_vz_restore_stimer() - Restore soft timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore VZ state relating to the soft timer. The hard timer can be enabled + * later. + */ +static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare, + u32 cause) +{ + /* + * Avoid spurious counter interrupts by setting Guest CP0_Count to just + * after Guest CP0_Compare. + */ + write_c0_gtoffset(compare - read_c0_count()); + + back_to_back_c0_hazard(); + write_gc0_cause(cause); +} + +/** + * _kvm_vz_restore_htimer() - Restore hard timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the + * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause. + */ +static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, + u32 compare, u32 cause) +{ + u32 start_count, after_count; + ktime_t freeze_time; + unsigned long flags; + + /* + * Freeze the soft-timer and sync the guest CP0_Count with it. We do + * this with interrupts disabled to avoid latency. + */ + local_irq_save(flags); + freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count); + write_c0_gtoffset(start_count - read_c0_count()); + local_irq_restore(flags); + + /* restore guest CP0_Cause, as TI may already be set */ + back_to_back_c0_hazard(); + write_gc0_cause(cause); + + /* + * The above sequence isn't atomic and would result in lost timer + * interrupts if we're not careful. Detect if a timer interrupt is due + * and assert it. + */ + back_to_back_c0_hazard(); + after_count = read_gc0_count(); + if (after_count - start_count > compare - start_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +/** + * kvm_vz_restore_timer() - Restore timer state. + * @vcpu: Virtual CPU. + * + * Restore soft timer state from saved context. + */ +static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 cause, compare; + + compare = kvm_read_sw_gc0_compare(cop0); + cause = kvm_read_sw_gc0_cause(cop0); + + write_gc0_compare(compare); + _kvm_vz_restore_stimer(vcpu, compare, cause); +} + +/** + * kvm_vz_acquire_htimer() - Switch to hard timer state. + * @vcpu: Virtual CPU. + * + * Restore hard timer state on top of existing soft timer state if possible. + * + * Since hard timer won't remain active over preemption, preemption should be + * disabled by the caller. + */ +void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0; + + gctl0 = read_c0_guestctl0(); + if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) { + /* enable guest access to hard timer */ + write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT); + + _kvm_vz_restore_htimer(vcpu, read_gc0_compare(), + read_gc0_cause()); + } +} + +/** + * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. + * @vcpu: Virtual CPU. + * @compare: Pointer to write compare value to. + * @cause: Pointer to write cause value to. + * + * Save VZ guest timer state and switch to software emulation of guest CP0 + * timer. The hard timer must already be in use, so preemption should be + * disabled. + */ +static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu, + u32 *out_compare, u32 *out_cause) +{ + u32 cause, compare, before_count, end_count; + ktime_t before_time; + + compare = read_gc0_compare(); + *out_compare = compare; + + before_time = ktime_get(); + + /* + * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time + * at which no pending timer interrupt is missing. + */ + before_count = read_gc0_count(); + back_to_back_c0_hazard(); + cause = read_gc0_cause(); + *out_cause = cause; + + /* + * Record a final CP0_Count which we will transfer to the soft-timer. + * This is recorded *after* saving CP0_Cause, so we don't get any timer + * interrupts from just after the final CP0_Count point. + */ + back_to_back_c0_hazard(); + end_count = read_gc0_count(); + + /* + * The above sequence isn't atomic, so we could miss a timer interrupt + * between reading CP0_Cause and end_count. Detect and record any timer + * interrupt due between before_count and end_count. + */ + if (end_count - before_count > compare - before_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); + + /* + * Restore soft-timer, ignoring a small amount of negative drift due to + * delay between freeze_hrtimer and setting CP0_GTOffset. + */ + kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000); +} + +/** + * kvm_vz_save_timer() - Save guest timer state. + * @vcpu: Virtual CPU. + * + * Save VZ guest timer state and switch to soft guest timer if hard timer was in + * use. + */ +static void kvm_vz_save_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 gctl0, compare, cause; + + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of hard timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* save hard timer state */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + } else { + compare = read_gc0_compare(); + cause = read_gc0_cause(); + } + + /* save timer-related state to VCPU context */ + kvm_write_sw_gc0_cause(cop0, cause); + kvm_write_sw_gc0_compare(cop0, compare); +} + +/** + * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use. + * @vcpu: Virtual CPU. + * + * Transfers the state of the hard guest timer to the soft guest timer, leaving + * guest state intact so it can continue to be used with the soft timer. + */ +void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0, compare, cause; + + preempt_disable(); + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* switch to soft timer */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + + /* leave soft timer in usable state */ + _kvm_vz_restore_stimer(vcpu, compare, cause); + } + preempt_enable(); +} + +/** + * is_eva_access() - Find whether an instruction is an EVA memory accessor. + * @inst: 32-bit instruction encoding. + * + * Finds whether @inst encodes an EVA memory access instruction, which would + * indicate that emulation of it should access the user mode address space + * instead of the kernel mode address space. This matters for MUSUK segments + * which are TLB mapped for user mode but unmapped for kernel mode. + * + * Returns: Whether @inst encodes an EVA accessor instruction. + */ +static bool is_eva_access(union mips_instruction inst) +{ + if (inst.spec3_format.opcode != spec3_op) + return false; + + switch (inst.spec3_format.func) { + case lwle_op: + case lwre_op: + case cachee_op: + case sbe_op: + case she_op: + case sce_op: + case swe_op: + case swle_op: + case swre_op: + case prefe_op: + case lbue_op: + case lhue_op: + case lbe_op: + case lhe_op: + case lle_op: + case lwe_op: + return true; + default: + return false; + } +} + +/** + * is_eva_am_mapped() - Find whether an access mode is mapped. + * @vcpu: KVM VCPU state. + * @am: 3-bit encoded access mode. + * @eu: Segment becomes unmapped and uncached when Status.ERL=1. + * + * Decode @am to find whether it encodes a mapped segment for the current VCPU + * state. Where necessary @eu and the actual instruction causing the fault are + * taken into account to make the decision. + * + * Returns: Whether the VCPU faulted on a TLB mapped address. + */ +static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu) +{ + u32 am_lookup; + int err; + + /* + * Interpret access control mode. We assume address errors will already + * have been caught by the guest, leaving us with: + * AM UM SM KM 31..24 23..16 + * UK 0 000 Unm 0 0 + * MK 1 001 TLB 1 + * MSK 2 010 TLB TLB 1 + * MUSK 3 011 TLB TLB TLB 1 + * MUSUK 4 100 TLB TLB Unm 0 1 + * USK 5 101 Unm Unm 0 0 + * - 6 110 0 0 + * UUSK 7 111 Unm Unm Unm 0 0 + * + * We shift a magic value by AM across the sign bit to find if always + * TLB mapped, and if not shift by 8 again to find if it depends on KM. + */ + am_lookup = 0x70080000 << am; + if ((s32)am_lookup < 0) { + /* + * MK, MSK, MUSK + * Always TLB mapped, unless SegCtl.EU && ERL + */ + if (!eu || !(read_gc0_status() & ST0_ERL)) + return true; + } else { + am_lookup <<= 8; + if ((s32)am_lookup < 0) { + union mips_instruction inst; + unsigned int status; + u32 *opc; + + /* + * MUSUK + * TLB mapped if not in kernel mode + */ + status = read_gc0_status(); + if (!(status & (ST0_EXL | ST0_ERL)) && + (status & ST0_KSU)) + return true; + /* + * EVA access instructions in kernel + * mode access user address space. + */ + opc = (u32 *)vcpu->arch.pc; + if (vcpu->arch.host_cp0_cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (!err && is_eva_access(inst)) + return true; + } + } + + return false; +} + +/** + * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA. + * @vcpu: KVM VCPU state. + * @gva: Guest virtual address to convert. + * @gpa: Output guest physical address. + * + * Convert a guest virtual address (GVA) which is valid according to the guest + * context, to a guest physical address (GPA). + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + u32 gva32 = gva; + unsigned long segctl; + + if ((long)gva == (s32)gva32) { + /* Handle canonical 32-bit virtual address */ + if (cpu_guest_has_segments) { + unsigned long mask, pa; + + switch (gva32 >> 29) { + case 0: + case 1: /* CFG5 (1GB) */ + segctl = read_gc0_segctl2() >> 16; + mask = (unsigned long)0xfc0000000ull; + break; + case 2: + case 3: /* CFG4 (1GB) */ + segctl = read_gc0_segctl2(); + mask = (unsigned long)0xfc0000000ull; + break; + case 4: /* CFG3 (512MB) */ + segctl = read_gc0_segctl1() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 5: /* CFG2 (512MB) */ + segctl = read_gc0_segctl1(); + mask = (unsigned long)0xfe0000000ull; + break; + case 6: /* CFG1 (512MB) */ + segctl = read_gc0_segctl0() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 7: /* CFG0 (512MB) */ + segctl = read_gc0_segctl0(); + mask = (unsigned long)0xfe0000000ull; + break; + default: + /* + * GCC 4.9 isn't smart enough to figure out that + * segctl and mask are always initialised. + */ + unreachable(); + } + + if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7, + segctl & 0x0008)) + goto tlb_mapped; + + /* Unmapped, find guest physical address */ + pa = (segctl << 20) & mask; + pa |= gva32 & ~mask; + *gpa = pa; + return 0; + } else if ((s32)gva32 < (s32)0xc0000000) { + /* legacy unmapped KSeg0 or KSeg1 */ + *gpa = gva32 & 0x1fffffff; + return 0; + } +#ifdef CONFIG_64BIT + } else if ((gva & 0xc000000000000000) == 0x8000000000000000) { + /* XKPHYS */ + if (cpu_guest_has_segments) { + /* + * Each of the 8 regions can be overridden by SegCtl2.XR + * to use SegCtl1.XAM. + */ + segctl = read_gc0_segctl2(); + if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) { + segctl = read_gc0_segctl1(); + if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7, + 0)) + goto tlb_mapped; + } + + } + /* + * Traditionally fully unmapped. + * Bits 61:59 specify the CCA, which we can just mask off here. + * Bits 58:PABITS should be zero, but we shouldn't have got here + * if it wasn't. + */ + *gpa = gva & 0x07ffffffffffffff; + return 0; +#endif + } + +tlb_mapped: + return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa); +} + +/** + * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA. + * @vcpu: KVM VCPU state. + * @badvaddr: Root BadVAddr. + * @gpa: Output guest physical address. + * + * VZ implementations are permitted to report guest virtual addresses (GVA) in + * BadVAddr on a root exception during guest execution, instead of the more + * convenient guest physical addresses (GPA). When we get a GVA, this function + * converts it to a GPA, taking into account guest segmentation and guest TLB + * state. + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr, + unsigned long *gpa) +{ + unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + + /* If BadVAddr is GPA, then all is well in the world */ + if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) { + *gpa = badvaddr; + return 0; + } + + /* Otherwise we'd expect it to be GVA ... */ + if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA, + "Unexpected gexccode %#x\n", gexccode)) + return -EINVAL; + + /* ... and we need to perform the GVA->GPA translation in software */ + return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa); +} + +static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 inst = 0; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + read_gc0_status()); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + +static unsigned long mips_process_maar(unsigned int op, unsigned long val) +{ + /* Mask off unused bits */ + unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL; + + if (read_gc0_pagegrain() & PG_ELPA) + mask |= 0x00ffffff00000000ull; + if (cpu_guest_has_mvh) + mask |= MIPS_MAAR_VH; + + /* Set or clear VH */ + if (op == mtc_op) { + /* clear VH */ + val &= ~MIPS_MAAR_VH; + } else if (op == dmtc_op) { + /* set VH to match VL */ + val &= ~MIPS_MAAR_VH; + if (val & MIPS_MAAR_VL) + val |= MIPS_MAAR_VH; + } + + return val & mask; +} + +static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + val &= MIPS_MAARI_INDEX; + if (val == MIPS_MAARI_INDEX) + kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1); + else if (val < ARRAY_SIZE(vcpu->arch.maar)) + kvm_write_sw_gc0_maari(cop0, val); +} + +static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + enum emulation_result er = EMULATE_DONE; + u32 rt, rd, sel; + unsigned long curr_pc; + unsigned long val; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + if (inst.co_format.co) { + switch (inst.co_format.func) { + case wait_op: + er = kvm_mips_emul_wait(vcpu); + break; + default: + er = EMULATE_FAIL; + } + } else { + rt = inst.c0r_format.rt; + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + switch (inst.c0r_format.rs) { + case dmfc_op: + case mfc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + val = kvm_mips_read_count(vcpu); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + val = read_gc0_compare(); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + if (cpu_guest_has_rw_llb) + val = read_gc0_lladdr() & + MIPS_LLADDR_LLB; + else + val = 0; + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + val = vcpu->arch.maar[ + kvm_read_sw_gc0_maari(cop0)]; + } else if ((rd == MIPS_CP0_PRID && + (sel == 0 || /* PRid */ + sel == 2 || /* CDMMBase */ + sel == 3)) || /* CMGCRBase */ + (rd == MIPS_CP0_STATUS && + (sel == 2 || /* SRSCtl */ + sel == 3)) || /* SRSMap */ + (rd == MIPS_CP0_CONFIG && + (sel == 7)) || /* Config7 */ + (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) || + (rd == MIPS_CP0_ERRCTL && + (sel == 0))) { /* ErrCtl */ + val = cop0->reg[rd][sel]; + } else { + val = 0; + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) { + /* Sign extend */ + if (inst.c0r_format.rs == mfc_op) + val = (int)val; + vcpu->arch.gprs[rt] = val; + } + + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ? + KVM_TRACE_MFC0 : KVM_TRACE_DMFC0, + KVM_TRACE_COP0(rd, sel), val); + break; + + case dmtc_op: + case mtc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + val = vcpu->arch.gprs[rt]; + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ? + KVM_TRACE_MTC0 : KVM_TRACE_DMTC0, + KVM_TRACE_COP0(rd, sel), val); + + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + kvm_vz_lose_htimer(vcpu); + kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + kvm_mips_write_compare(vcpu, + vcpu->arch.gprs[rt], + true); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + /* + * P5600 generates GPSI on guest MTC0 LLAddr. + * Only allow the guest to clear LLB. + */ + if (cpu_guest_has_rw_llb && + !(val & MIPS_LLADDR_LLB)) + write_gc0_lladdr(0); + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + val = mips_process_maar(inst.c0r_format.rs, + val); + + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] = + val; + } else if (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + kvm_write_maari(vcpu, val); + } else if (rd == MIPS_CP0_ERRCTL && + (sel == 0)) { /* ErrCtl */ + /* ignore the written value */ + } else { + er = EMULATE_FAIL; + } + break; + + default: + er = EMULATE_FAIL; + break; + } + } + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n", + curr_pc, __func__, inst.word); + + vcpu->arch.pc = curr_pc; + } + + return er; +} + +static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + u32 cache, op_inst, op, base; + s16 offset; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long va, curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + base = inst.i_format.rs; + op_inst = inst.i_format.rt; + if (cpu_has_mips_r6) + offset = inst.spec3_format.simmediate; + else + offset = inst.i_format.simmediate; + cache = op_inst & CacheOp_Cache; + op = op_inst & CacheOp_Op; + + va = arch->gprs[base] + offset; + + kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); + + /* Secondary or tirtiary cache ops ignored */ + if (cache != Cache_I && cache != Cache_D) + return EMULATE_DONE; + + switch (op_inst) { + case Index_Invalidate_I: + flush_icache_line_indexed(va); + return EMULATE_DONE; + case Index_Writeback_Inv_D: + flush_dcache_line_indexed(va); + return EMULATE_DONE; + case Hit_Invalidate_I: + case Hit_Invalidate_D: + case Hit_Writeback_Inv_D: + if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) { + /* We can just flush entire icache */ + local_flush_icache_range(0, 0); + return EMULATE_DONE; + } + + /* So far, other platforms support guest hit cache ops */ + break; + default: + break; + }; + + kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], + offset); + /* Rollback PC */ + vcpu->arch.pc = curr_pc; + + return EMULATE_FAIL; +} + +static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + struct kvm_run *run = vcpu->run; + union mips_instruction inst; + int rd, rt, sel; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + switch (inst.r_format.opcode) { + case cop0_op: + er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu); + break; +#ifndef CONFIG_CPU_MIPSR6 + case cache_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); + break; +#endif + case spec3_op: + switch (inst.spec3_format.func) { +#ifdef CONFIG_CPU_MIPSR6 + case cache6_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); + break; +#endif + case rdhwr_op: + if (inst.r_format.rs || (inst.r_format.re >> 3)) + goto unknown; + + rd = inst.r_format.rd; + rt = inst.r_format.rt; + sel = inst.r_format.re & 0x7; + + switch (rd) { + case MIPS_HWR_CC: /* Read count register */ + arch->gprs[rt] = + (long)(int)kvm_mips_read_count(vcpu); + break; + default: + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), 0); + goto unknown; + }; + + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); + + er = update_pc(vcpu, cause); + break; + default: + goto unknown; + }; + break; +unknown: + + default: + kvm_err("GPSI exception not supported (%p/%#x)\n", + opc, inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + er = EMULATE_FAIL; + break; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + union mips_instruction inst; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* complete MTC0 on behalf of guest and advance EPC */ + if (inst.c0r_format.opcode == cop0_op && + inst.c0r_format.rs == mtc_op && + inst.c0r_format.z == 0) { + int rt = inst.c0r_format.rt; + int rd = inst.c0r_format.rd; + int sel = inst.c0r_format.sel; + unsigned int val = arch->gprs[rt]; + unsigned int old_val, change; + + trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel), + val); + + if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { + /* FR bit should read as zero if no FPU */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't support + * it. + */ + if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + old_val = read_gc0_status(); + change = val ^ old_val; + + if (change & ST0_FR) { + /* + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. + */ + kvm_drop_fpu(vcpu); + } + + /* + * If MSA state is already live, it is undefined how it + * interacts with FR=0 FPU state, and we don't want to + * hit reserved instruction exceptions trying to save + * the MSA state later when CU=1 && FR=1, so play it + * safe and save it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + write_gc0_status(val); + } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { + u32 old_cause = read_gc0_cause(); + u32 change = old_cause ^ val; + + /* DC bit enabling/disabling timer? */ + if (change & CAUSEF_DC) { + if (val & CAUSEF_DC) { + kvm_vz_lose_htimer(vcpu); + kvm_mips_count_disable_cause(vcpu); + } else { + kvm_mips_count_enable_cause(vcpu); + } + } + + /* Only certain bits are RW to the guest */ + change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP | + CAUSEF_IP0 | CAUSEF_IP1); + + /* WP can only be cleared */ + change &= ~CAUSEF_WP | old_cause; + + write_gc0_cause(old_cause ^ change); + } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */ + write_gc0_intctl(val); + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + old_val = read_gc0_config5(); + change = val ^ old_val; + /* Handle changes in FPU/MSA modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + preempt_enable(); + + val = old_val ^ + (change & kvm_vz_config5_guest_wrmask(vcpu)); + write_gc0_config5(val); + } else { + kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) + er = update_pc(vcpu, cause); + } else { + kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + /* + * Presumably this is due to MC (guest mode change), so lets trace some + * relevant info. + */ + trace_kvm_guest_mode_change(vcpu); + + return EMULATE_DONE; +} + +static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + unsigned long curr_pc; + int err; + + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + er = kvm_mips_emul_hypcall(vcpu, inst); + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; + + return er; +} + +static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode, + u32 cause, + u32 *opc, + struct kvm_vcpu *vcpu) +{ + u32 inst; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n", + gexccode, opc, inst, read_gc0_status()); + + return EMULATE_FAIL; +} + +static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + u32 gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + int ret = RESUME_GUEST; + + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode); + switch (gexccode) { + case MIPS_GCTL0_GEXC_GPSI: + ++vcpu->stat.vz_gpsi_exits; + er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GSFC: + ++vcpu->stat.vz_gsfc_exits; + er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_HC: + ++vcpu->stat.vz_hc_exits; + er = kvm_trap_vz_handle_hc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GRR: + ++vcpu->stat.vz_grr_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GVA: + ++vcpu->stat.vz_gva_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GHFC: + ++vcpu->stat.vz_ghfc_exits; + er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GPA: + ++vcpu->stat.vz_gpa_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + default: + ++vcpu->stat.vz_resvd_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_HYPERCALL) { + ret = kvm_mips_handle_hypcall(vcpu); + } else { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +/** + * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use a coprocessor which hasn't been allowed + * by the root context. + */ +static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_FAIL; + int ret = RESUME_GUEST; + + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* + * If guest FPU not present, the FPU operation should have been + * treated as a reserved instruction! + * If FPU already in use, we shouldn't get this at all. + */ + if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { + preempt_enable(); + return EMULATE_FAIL; + } + + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + /* other coprocessors not handled */ + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + +/** + * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled in the root + * context. + */ +static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If MSA not present or not exposed to guest or FR=0, the MSA operation + * should have been treated as a reserved instruction! + * Same if CU1=1, FR=0. + * If MSA already in use, we shouldn't get this at all. + */ + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 || + !(read_gc0_config5() & MIPS_CONF5_MSAEN) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_msa(vcpu); + + return RESUME_GUEST; +} + +static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err, ret = RESUME_GUEST; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) { + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_load(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err; + int ret = RESUME_GUEST; + + /* Just try the access again if we couldn't do the translation */ + if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr)) + return RESUME_GUEST; + vcpu->arch.host_cp0_badvaddr = badvaddr; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) { + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_store(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static u64 kvm_vz_get_one_regs[] = { + KVM_REG_MIPS_CP0_INDEX, + KVM_REG_MIPS_CP0_ENTRYLO0, + KVM_REG_MIPS_CP0_ENTRYLO1, + KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_PAGEMASK, + KVM_REG_MIPS_CP0_PAGEGRAIN, + KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, + KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, + KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, + KVM_REG_MIPS_CP0_STATUS, + KVM_REG_MIPS_CP0_INTCTL, + KVM_REG_MIPS_CP0_CAUSE, + KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, + KVM_REG_MIPS_CP0_EBASE, + KVM_REG_MIPS_CP0_CONFIG, + KVM_REG_MIPS_CP0_CONFIG1, + KVM_REG_MIPS_CP0_CONFIG2, + KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXT, +#endif + KVM_REG_MIPS_CP0_ERROREPC, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, +}; + +static u64 kvm_vz_get_one_regs_contextconfig[] = { + KVM_REG_MIPS_CP0_CONTEXTCONFIG, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXTCONFIG, +#endif +}; + +static u64 kvm_vz_get_one_regs_segments[] = { + KVM_REG_MIPS_CP0_SEGCTL0, + KVM_REG_MIPS_CP0_SEGCTL1, + KVM_REG_MIPS_CP0_SEGCTL2, +}; + +static u64 kvm_vz_get_one_regs_htw[] = { + KVM_REG_MIPS_CP0_PWBASE, + KVM_REG_MIPS_CP0_PWFIELD, + KVM_REG_MIPS_CP0_PWSIZE, + KVM_REG_MIPS_CP0_PWCTL, +}; + +static u64 kvm_vz_get_one_regs_kscratch[] = { + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, +}; + +static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_vz_get_one_regs); + if (cpu_guest_has_userlocal) + ++ret; + if (cpu_guest_has_badinstr) + ++ret; + if (cpu_guest_has_badinstrp) + ++ret; + if (cpu_guest_has_contextconfig) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + if (cpu_guest_has_segments) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + if (cpu_guest_has_htw) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) + ret += 1 + ARRAY_SIZE(vcpu->arch.maar); + ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask); + + return ret; +} + +static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_vz_get_one_regs, + sizeof(kvm_vz_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs); + + if (cpu_guest_has_userlocal) { + index = KVM_REG_MIPS_CP0_USERLOCAL; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstr) { + index = KVM_REG_MIPS_CP0_BADINSTR; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstrp) { + index = KVM_REG_MIPS_CP0_BADINSTRP; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_contextconfig) { + if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig, + sizeof(kvm_vz_get_one_regs_contextconfig))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + } + if (cpu_guest_has_segments) { + if (copy_to_user(indices, kvm_vz_get_one_regs_segments, + sizeof(kvm_vz_get_one_regs_segments))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + } + if (cpu_guest_has_htw) { + if (copy_to_user(indices, kvm_vz_get_one_regs_htw, + sizeof(kvm_vz_get_one_regs_htw))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + } + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) { + index = KVM_REG_MIPS_CP0_MAAR(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + + index = KVM_REG_MIPS_CP0_MAARI; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + for (i = 0; i < 6; ++i) { + if (!cpu_guest_has_kscr(i + 2)) + continue; + + if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i], + sizeof(kvm_vz_get_one_regs_kscratch[i]))) + return -EFAULT; + ++indices; + } + + return 0; +} + +static inline s64 entrylo_kvm_to_user(unsigned long v) +{ + s64 mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit version of the register, so move the + * RI/XI bits up into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= ((s64)v & mask) << 32; + } + return ret; +} + +static inline unsigned long entrylo_user_to_kvm(s64 v) +{ + unsigned long mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit versiono of the register, so move the + * RI/XI bits down into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= (v >> 32) & mask; + } + return ret; +} + +static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 *v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int idx; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + *v = (long)read_gc0_index(); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + *v = entrylo_kvm_to_user(read_gc0_entrylo0()); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + *v = entrylo_kvm_to_user(read_gc0_entrylo1()); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + *v = (long)read_gc0_context(); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_contextconfig(); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + *v = read_gc0_userlocal(); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_xcontextconfig(); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + *v = (long)read_gc0_pagemask(); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + *v = (long)read_gc0_pagegrain(); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl0(); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl1(); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl2(); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwbase(); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwfield(); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwsize(); + break; + case KVM_REG_MIPS_CP0_WIRED: + *v = (long)read_gc0_wired(); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwctl(); + break; + case KVM_REG_MIPS_CP0_HWRENA: + *v = (long)read_gc0_hwrena(); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + *v = (long)read_gc0_badvaddr(); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + *v = read_gc0_badinstr(); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + *v = read_gc0_badinstrp(); + break; + case KVM_REG_MIPS_CP0_COUNT: + *v = kvm_mips_read_count(vcpu); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + *v = (long)read_gc0_entryhi(); + break; + case KVM_REG_MIPS_CP0_COMPARE: + *v = (long)read_gc0_compare(); + break; + case KVM_REG_MIPS_CP0_STATUS: + *v = (long)read_gc0_status(); + break; + case KVM_REG_MIPS_CP0_INTCTL: + *v = read_gc0_intctl(); + break; + case KVM_REG_MIPS_CP0_CAUSE: + *v = (long)read_gc0_cause(); + break; + case KVM_REG_MIPS_CP0_EPC: + *v = (long)read_gc0_epc(); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a read-only guest.PRid */ + *v = read_gc0_prid(); + break; + default: + *v = (long)kvm_read_c0_guest_prid(cop0); + break; + }; + break; + case KVM_REG_MIPS_CP0_EBASE: + *v = kvm_vz_read_gc0_ebase(); + break; + case KVM_REG_MIPS_CP0_CONFIG: + *v = read_gc0_config(); + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + return -EINVAL; + *v = read_gc0_config1(); + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + return -EINVAL; + *v = read_gc0_config2(); + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + return -EINVAL; + *v = read_gc0_config3(); + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + return -EINVAL; + *v = read_gc0_config4(); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + return -EINVAL; + *v = read_gc0_config5(); + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + *v = vcpu->arch.maar[idx]; + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + *v = read_gc0_xcontext(); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + *v = (long)read_gc0_errorepc(); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + *v = (long)read_gc0_kscratch1(); + break; + case 3: + *v = (long)read_gc0_kscratch2(); + break; + case 4: + *v = (long)read_gc0_kscratch3(); + break; + case 5: + *v = (long)read_gc0_kscratch4(); + break; + case 6: + *v = (long)read_gc0_kscratch5(); + break; + case 7: + *v = (long)read_gc0_kscratch6(); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + *v = vcpu->arch.count_ctl; + break; + case KVM_REG_MIPS_COUNT_RESUME: + *v = ktime_to_ns(vcpu->arch.count_resume); + break; + case KVM_REG_MIPS_COUNT_HZ: + *v = vcpu->arch.count_hz; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int idx; + int ret = 0; + unsigned int cur, change; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + write_gc0_index(v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + write_gc0_entrylo0(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + write_gc0_entrylo1(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + write_gc0_context(v); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_contextconfig(v); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + write_gc0_userlocal(v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_xcontextconfig(v); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + write_gc0_pagemask(v); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + write_gc0_pagegrain(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl0(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl1(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl2(v); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwbase(v); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwfield(v); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwsize(v); + break; + case KVM_REG_MIPS_CP0_WIRED: + change_gc0_wired(MIPSR6_WIRED_WIRED, v); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwctl(v); + break; + case KVM_REG_MIPS_CP0_HWRENA: + write_gc0_hwrena(v); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + write_gc0_badvaddr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + write_gc0_badinstr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + write_gc0_badinstrp(v); + break; + case KVM_REG_MIPS_CP0_COUNT: + kvm_mips_write_count(vcpu, v); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + write_gc0_entryhi(v); + break; + case KVM_REG_MIPS_CP0_COMPARE: + kvm_mips_write_compare(vcpu, v, false); + break; + case KVM_REG_MIPS_CP0_STATUS: + write_gc0_status(v); + break; + case KVM_REG_MIPS_CP0_INTCTL: + write_gc0_intctl(v); + break; + case KVM_REG_MIPS_CP0_CAUSE: + /* + * If the timer is stopped or started (DC bit) it must look + * atomic with changes to the timer interrupt pending bit (TI). + * A timer interrupt should not happen in between. + */ + if ((read_gc0_cause() ^ v) & CAUSEF_DC) { + if (v & CAUSEF_DC) { + /* disable timer first */ + kvm_mips_count_disable_cause(vcpu); + change_gc0_cause((u32)~CAUSEF_DC, v); + } else { + /* enable timer last */ + change_gc0_cause((u32)~CAUSEF_DC, v); + kvm_mips_count_enable_cause(vcpu); + } + } else { + write_gc0_cause(v); + } + break; + case KVM_REG_MIPS_CP0_EPC: + write_gc0_epc(v); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a guest.PRid, but its read-only */ + break; + default: + kvm_write_c0_guest_prid(cop0, v); + break; + }; + break; + case KVM_REG_MIPS_CP0_EBASE: + kvm_vz_write_gc0_ebase(v); + break; + case KVM_REG_MIPS_CP0_CONFIG: + cur = read_gc0_config(); + change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + break; + cur = read_gc0_config1(); + change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config1(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + break; + cur = read_gc0_config2(); + change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config2(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + break; + cur = read_gc0_config3(); + change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config3(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + break; + cur = read_gc0_config4(); + change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config4(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + break; + cur = read_gc0_config5(); + change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config5(v); + } + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v); + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + kvm_write_maari(vcpu, v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + write_gc0_xcontext(v); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + write_gc0_errorepc(v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + write_gc0_kscratch1(v); + break; + case 3: + write_gc0_kscratch2(v); + break; + case 4: + write_gc0_kscratch3(v); + break; + case 5: + write_gc0_kscratch4(v); + break; + case 6: + write_gc0_kscratch5(v); + break; + case 7: + write_gc0_kscratch6(v); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + ret = kvm_mips_set_count_ctl(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_RESUME: + ret = kvm_mips_set_count_resume(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_set_count_hz(vcpu, v); + break; + default: + return -EINVAL; + } + return ret; +} + +#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache) +static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu) +{ + unsigned long guestid = guestid_cache(cpu); + + if (!(++guestid & GUESTID_MASK)) { + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (!guestid) /* fix version if needed */ + guestid = GUESTID_FIRST_VERSION; + + ++guestid; /* guestid 0 reserved for root */ + + /* start new guestid cycle */ + kvm_vz_local_flush_roottlb_all_guests(); + kvm_vz_local_flush_guesttlb_all(); + } + + guestid_cache(cpu) = guestid; +} + +/* Returns 1 if the guest TLB may be clobbered */ +static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) +{ + int ret = 0; + int i; + + if (!kvm_request_pending(vcpu)) + return 0; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + if (cpu_has_guestid) { + /* Drop all GuestIDs for this VCPU */ + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + /* This will clobber guest TLB contents too */ + ret = 1; + } + /* + * For Root ASID Dealias (RAD) we don't do anything here, but we + * still need the request to ensure we recheck asid_flush_mask. + * We can still return 0 as only the root TLB will be affected + * by a root ASID flush. + */ + } + + return ret; +} + +static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu) +{ + unsigned int wired = read_gc0_wired(); + struct kvm_mips_tlb *tlbs; + int i; + + /* Expand the wired TLB array if necessary */ + wired &= MIPSR6_WIRED_WIRED; + if (wired > vcpu->arch.wired_tlb_limit) { + tlbs = krealloc(vcpu->arch.wired_tlb, wired * + sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC); + if (WARN_ON(!tlbs)) { + /* Save whatever we can */ + wired = vcpu->arch.wired_tlb_limit; + } else { + vcpu->arch.wired_tlb = tlbs; + vcpu->arch.wired_tlb_limit = wired; + } + } + + if (wired) + /* Save wired entries from the guest TLB */ + kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired); + /* Invalidate any dropped entries since last time */ + for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) { + vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + vcpu->arch.wired_tlb[i].tlb_lo[0] = 0; + vcpu->arch.wired_tlb[i].tlb_lo[1] = 0; + vcpu->arch.wired_tlb[i].tlb_mask = 0; + } + vcpu->arch.wired_tlb_used = wired; +} + +static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu) +{ + /* Load wired entries into the guest TLB */ + if (vcpu->arch.wired_tlb) + kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0, + vcpu->arch.wired_tlb_used); +} + +static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm *kvm = vcpu->kvm; + struct mm_struct *gpa_mm = &kvm->arch.gpa_mm; + bool migrated; + + /* + * Are we entering guest context on a different CPU to last time? + * If so, the VCPU's guest TLB state on this CPU may be stale. + */ + migrated = (vcpu->arch.last_exec_cpu != cpu); + vcpu->arch.last_exec_cpu = cpu; + + /* + * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and + * remains set until another vcpu is loaded in. As a rule GuestRID + * remains zeroed when in root context unless the kernel is busy + * manipulating guest tlb entries. + */ + if (cpu_has_guestid) { + /* + * Check if our GuestID is of an older version and thus invalid. + * + * We also discard the stored GuestID if we've executed on + * another CPU, as the guest mappings may have changed without + * hypervisor knowledge. + */ + if (migrated || + (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) & + GUESTID_VERSION_MASK) { + kvm_vz_get_new_guestid(cpu, vcpu); + vcpu->arch.vzguestid[cpu] = guestid_cache(cpu); + trace_kvm_guestid_change(vcpu, + vcpu->arch.vzguestid[cpu]); + } + + /* Restore GuestID */ + change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]); + } else { + /* + * The Guest TLB only stores a single guest's TLB state, so + * flush it if another VCPU has executed on this CPU. + * + * We also flush if we've executed on another CPU, as the guest + * mappings may have changed without hypervisor knowledge. + */ + if (migrated || last_exec_vcpu[cpu] != vcpu) + kvm_vz_local_flush_guesttlb_all(); + last_exec_vcpu[cpu] = vcpu; + + /* + * Root ASID dealiases guest GPA mappings in the root TLB. + * Allocate new root ASID if needed. + */ + if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask) + || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(gpa_mm, cpu); + } +} + +static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + bool migrated, all; + + /* + * Have we migrated to a different CPU? + * If so, any old guest TLB state may be stale. + */ + migrated = (vcpu->arch.last_sched_cpu != cpu); + + /* + * Was this the last VCPU to run on this CPU? + * If not, any old guest state from this VCPU will have been clobbered. + */ + all = migrated || (last_vcpu[cpu] != vcpu); + last_vcpu[cpu] = vcpu; + + /* + * Restore CP0_Wired unconditionally as we clear it after use, and + * restore wired guest TLB entries (while in guest context). + */ + kvm_restore_gc0_wired(cop0); + if (current->flags & PF_VCPU) { + tlbw_use_hazard(); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + } + + /* + * Restore timer state regardless, as e.g. Cause.TI can change over time + * if left unmaintained. + */ + kvm_vz_restore_timer(vcpu); + + /* Set MC bit if we want to trace guest mode changes */ + if (kvm_trace_guest_mode_change) + set_c0_guestctl0(MIPS_GCTL0_MC); + else + clear_c0_guestctl0(MIPS_GCTL0_MC); + + /* Don't bother restoring registers multiple times unless necessary */ + if (!all) + return 0; + + /* + * Restore config registers first, as some implementations restrict + * writes to other registers when the corresponding feature bits aren't + * set. For example Status.CU1 cannot be set unless Config1.FP is set. + */ + kvm_restore_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_restore_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_restore_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_restore_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_restore_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_restore_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_restore_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_restore_gc0_config7(cop0); + + kvm_restore_gc0_index(cop0); + kvm_restore_gc0_entrylo0(cop0); + kvm_restore_gc0_entrylo1(cop0); + kvm_restore_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_restore_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_xcontextconfig(cop0); +#endif + kvm_restore_gc0_pagemask(cop0); + kvm_restore_gc0_pagegrain(cop0); + kvm_restore_gc0_hwrena(cop0); + kvm_restore_gc0_badvaddr(cop0); + kvm_restore_gc0_entryhi(cop0); + kvm_restore_gc0_status(cop0); + kvm_restore_gc0_intctl(cop0); + kvm_restore_gc0_epc(cop0); + kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0)); + if (cpu_guest_has_userlocal) + kvm_restore_gc0_userlocal(cop0); + + kvm_restore_gc0_errorepc(cop0); + + /* restore KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_restore_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_restore_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_restore_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_restore_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_restore_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_restore_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_restore_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_restore_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_restore_gc0_segctl0(cop0); + kvm_restore_gc0_segctl1(cop0); + kvm_restore_gc0_segctl2(cop0); + } + + /* restore HTW registers */ + if (cpu_guest_has_htw) { + kvm_restore_gc0_pwbase(cop0); + kvm_restore_gc0_pwfield(cop0); + kvm_restore_gc0_pwsize(cop0); + kvm_restore_gc0_pwctl(cop0); + } + + /* restore Root.GuestCtl2 from unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + write_c0_guestctl2( + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]); + + /* + * We should clear linked load bit to break interrupted atomics. This + * prevents a SC on the next VCPU from succeeding by matching a LL on + * the previous VCPU. + */ + if (cpu_guest_has_rw_llb) + write_gc0_lladdr(0); + + return 0; +} + +static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + if (current->flags & PF_VCPU) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_lose_fpu(vcpu); + + kvm_save_gc0_index(cop0); + kvm_save_gc0_entrylo0(cop0); + kvm_save_gc0_entrylo1(cop0); + kvm_save_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_save_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_xcontextconfig(cop0); +#endif + kvm_save_gc0_pagemask(cop0); + kvm_save_gc0_pagegrain(cop0); + kvm_save_gc0_wired(cop0); + /* allow wired TLB entries to be overwritten */ + clear_gc0_wired(MIPSR6_WIRED_WIRED); + kvm_save_gc0_hwrena(cop0); + kvm_save_gc0_badvaddr(cop0); + kvm_save_gc0_entryhi(cop0); + kvm_save_gc0_status(cop0); + kvm_save_gc0_intctl(cop0); + kvm_save_gc0_epc(cop0); + kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase()); + if (cpu_guest_has_userlocal) + kvm_save_gc0_userlocal(cop0); + + /* only save implemented config registers */ + kvm_save_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_save_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_save_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_save_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_save_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_save_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_save_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_save_gc0_config7(cop0); + + kvm_save_gc0_errorepc(cop0); + + /* save KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_save_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_save_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_save_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_save_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_save_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_save_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_save_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_save_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_save_gc0_segctl0(cop0); + kvm_save_gc0_segctl1(cop0); + kvm_save_gc0_segctl2(cop0); + } + + /* save HTW registers if enabled in guest */ + if (cpu_guest_has_htw && + kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) { + kvm_save_gc0_pwbase(cop0); + kvm_save_gc0_pwfield(cop0); + kvm_save_gc0_pwsize(cop0); + kvm_save_gc0_pwctl(cop0); + } + + kvm_vz_save_timer(vcpu); + + /* save Root.GuestCtl2 in unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = + read_c0_guestctl2(); + + return 0; +} + +/** + * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB. + * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries). + * + * Attempt to resize the guest VTLB by writing guest Config registers. This is + * necessary for cores with a shared root/guest TLB to avoid overlap with wired + * entries in the root VTLB. + * + * Returns: The resulting guest VTLB size. + */ +static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) +{ + unsigned int config4 = 0, ret = 0, limit; + + /* Write MMUSize - 1 into guest Config registers */ + if (cpu_guest_has_conf1) + change_gc0_config1(MIPS_CONF1_TLBS, + (size - 1) << MIPS_CONF1_TLBS_SHIFT); + if (cpu_guest_has_conf4) { + config4 = read_gc0_config4(); + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) { + config4 &= ~MIPS_CONF4_VTLBSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_VTLBSIZEEXT_SHIFT; + } else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) { + config4 &= ~MIPS_CONF4_MMUSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_MMUSIZEEXT_SHIFT; + } + write_gc0_config4(config4); + } + + /* + * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it + * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write + * not dropped) + */ + if (cpu_has_mips_r6) { + limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >> + MIPSR6_WIRED_LIMIT_SHIFT; + if (size - 1 <= limit) + limit = 0; + write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT); + } + + /* Read back MMUSize - 1 */ + back_to_back_c0_hazard(); + if (cpu_guest_has_conf1) + ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >> + MIPS_CONF1_TLBS_SHIFT; + if (config4) { + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) + ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >> + MIPS_CONF4_VTLBSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) + ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >> + MIPS_CONF4_MMUSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + } + return ret + 1; +} + +static int kvm_vz_hardware_enable(void) +{ + unsigned int mmu_size, guest_mmu_size, ftlb_size; + u64 guest_cvmctl, cvmvmconfig; + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Set up guest timer/perfcount IRQ lines */ + guest_cvmctl = read_gc0_cvmctl(); + guest_cvmctl &= ~CVMCTL_IPTI; + guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT; + guest_cvmctl &= ~CVMCTL_IPPCI; + guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT; + write_gc0_cvmctl(guest_cvmctl); + + cvmvmconfig = read_c0_cvmvmconfig(); + /* No I/O hole translation. */ + cvmvmconfig |= CVMVMCONF_DGHT; + /* Halve the root MMU size */ + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + guest_mmu_size = mmu_size / 2; + mmu_size -= guest_mmu_size; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = guest_mmu_size; + + /* Flush moved entries in new (guest) context */ + kvm_vz_local_flush_guesttlb_all(); + break; + default: + /* + * ImgTec cores tend to use a shared root/guest TLB. To avoid + * overlap of root wired and guest entries, the guest TLB may + * need resizing. + */ + mmu_size = current_cpu_data.tlbsizevtlb; + ftlb_size = current_cpu_data.tlbsize - mmu_size; + + /* Try switching to maximum guest VTLB size for flush */ + guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + kvm_vz_local_flush_guesttlb_all(); + + /* + * Reduce to make space for root wired entries and at least 2 + * root non-wired entries. This does assume that long-term wired + * entries won't be added later. + */ + guest_mmu_size = mmu_size - num_wired_entries() - 2; + guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + + /* + * Write the VTLB size, but if another CPU has already written, + * check it matches or we won't provide a consistent view to the + * guest. If this ever happens it suggests an asymmetric number + * of wired entries. + */ + if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) && + WARN(guest_mmu_size != kvm_vz_guest_vtlb_size, + "Available guest VTLB size mismatch")) + return -EINVAL; + break; + } + + /* + * Enable virtualization features granting guest direct control of + * certain features: + * CP0=1: Guest coprocessor 0 context. + * AT=Guest: Guest MMU. + * CG=1: Hit (virtual address) CACHE operations (optional). + * CF=1: Guest Config registers. + * CGI=1: Indexed flush CACHE operations (optional). + */ + write_c0_guestctl0(MIPS_GCTL0_CP0 | + (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) | + MIPS_GCTL0_CG | MIPS_GCTL0_CF); + if (cpu_has_guestctl0ext) + set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + + GUESTID_MASK = current_cpu_data.guestid_mask; + GUESTID_FIRST_VERSION = GUESTID_MASK + 1; + GUESTID_VERSION_MASK = ~GUESTID_MASK; + + current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION; + } + + /* clear any pending injected virtual guest interrupts */ + if (cpu_has_guestctl2) + clear_c0_guestctl2(0x3f << 10); + + return 0; +} + +static void kvm_vz_hardware_disable(void) +{ + u64 cvmvmconfig; + unsigned int mmu_size; + + /* Flush any remaining guest TLB entries */ + kvm_vz_local_flush_guesttlb_all(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* + * Allocate whole TLB for root. Existing guest TLB entries will + * change ownership to the root TLB. We should be safe though as + * they've already been flushed above while in guest TLB. + */ + cvmvmconfig = read_c0_cvmvmconfig(); + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = 0; + + /* Flush moved entries in new (root) context */ + local_flush_tlb_all(); + break; + } + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + } +} + +static int kvm_vz_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_MIPS_VZ: + /* we wouldn't be here unless cpu_has_vz */ + r = 1; + break; +#ifdef CONFIG_64BIT + case KVM_CAP_MIPS_64BIT: + /* We support 64-bit registers/operations and addresses */ + r = 2; + break; +#endif + default: + r = 0; + break; + } + + return r; +} + +static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + + return 0; +} + +static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + int cpu; + + /* + * If the VCPU is freed and reused as another VCPU, we don't want the + * matching pointer wrongly hanging around in last_vcpu[] or + * last_exec_vcpu[]. + */ + for_each_possible_cpu(cpu) { + if (last_vcpu[cpu] == vcpu) + last_vcpu[cpu] = NULL; + if (last_exec_vcpu[cpu] == vcpu) + last_exec_vcpu[cpu] = NULL; + } +} + +static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */ + + /* + * Start off the timer at the same frequency as the host timer, but the + * soft timer doesn't handle frequencies greater than 1GHz yet. + */ + if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC) + count_hz = mips_hpt_frequency; + kvm_mips_init_count(vcpu, count_hz); + + /* + * Initialize guest register state to valid architectural reset state. + */ + + /* PageGrain */ + if (cpu_has_mips_r6) + kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); + /* Wired */ + if (cpu_has_mips_r6) + kvm_write_sw_gc0_wired(cop0, + read_gc0_wired() & MIPSR6_WIRED_LIMIT); + /* Status */ + kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); + if (cpu_has_mips_r6) + kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); + /* IntCtl */ + kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & + (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI)); + /* PRId */ + kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id); + /* EBase */ + kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id); + /* Config */ + kvm_save_gc0_config(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK, + _page_cachable_default >> _CACHE_SHIFT); + /* architecturally read only, but maybe writable from root */ + kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config()); + if (cpu_guest_has_conf1) { + kvm_set_sw_gc0_config(cop0, MIPS_CONF_M); + /* Config1 */ + kvm_save_gc0_config1(cop0); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 | + MIPS_CONF1_MD | + MIPS_CONF1_PC | + MIPS_CONF1_WR | + MIPS_CONF1_CA | + MIPS_CONF1_FP); + } + if (cpu_guest_has_conf2) { + kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M); + /* Config2 */ + kvm_save_gc0_config2(cop0); + } + if (cpu_guest_has_conf3) { + kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M); + /* Config3 */ + kvm_save_gc0_config3(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA | + MIPS_CONF3_BPG | + MIPS_CONF3_ULRI | + MIPS_CONF3_DSP | + MIPS_CONF3_CTXTC | + MIPS_CONF3_ITL | + MIPS_CONF3_LPA | + MIPS_CONF3_VEIC | + MIPS_CONF3_VINT | + MIPS_CONF3_SP | + MIPS_CONF3_CDMM | + MIPS_CONF3_MT | + MIPS_CONF3_SM | + MIPS_CONF3_TL); + } + if (cpu_guest_has_conf4) { + kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M); + /* Config4 */ + kvm_save_gc0_config4(cop0); + } + if (cpu_guest_has_conf5) { + kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M); + /* Config5 */ + kvm_save_gc0_config5(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K | + MIPS_CONF5_CV | + MIPS_CONF5_MSAEN | + MIPS_CONF5_UFE | + MIPS_CONF5_FRE | + MIPS_CONF5_SBRI | + MIPS_CONF5_UFR); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP); + } + + if (cpu_guest_has_contextconfig) { + /* ContextConfig */ + kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0); +#ifdef CONFIG_64BIT + /* XContextConfig */ + /* bits SEGBITS-13+3:4 set */ + kvm_write_sw_gc0_xcontextconfig(cop0, + ((1ull << (cpu_vmbits - 13)) - 1) << 4); +#endif + } + + /* Implementation dependent, use the legacy layout */ + if (cpu_guest_has_segments) { + /* SegCtl0, SegCtl1, SegCtl2 */ + kvm_write_sw_gc0_segctl0(cop0, 0x00200010); + kvm_write_sw_gc0_segctl1(cop0, 0x00000002 | + (_page_cachable_default >> _CACHE_SHIFT) << + (16 + MIPS_SEGCFG_C_SHIFT)); + kvm_write_sw_gc0_segctl2(cop0, 0x00380438); + } + + /* reset HTW registers */ + if (cpu_guest_has_htw && cpu_has_mips_r6) { + /* PWField */ + kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); + /* PWSize */ + kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT); + } + + /* start with no pending virtual guest interrupts */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0; + + /* Put PC at reset vector */ + vcpu->arch.pc = CKSEG1ADDR(0x1fc00000); + + return 0; +} + +static void kvm_vz_flush_shadow_all(struct kvm *kvm) +{ + if (cpu_has_guestid) { + /* Flush GuestID for each VCPU individually */ + kvm_flush_remote_tlbs(kvm); + } else { + /* + * For each CPU there is a single GPA ASID used by all VCPUs in + * the VM, so it doesn't make sense for the VCPUs to handle + * invalidation of these ASIDs individually. + * + * Instead mark all CPUs as needing ASID invalidation in + * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to + * kick any running VCPUs so they check asid_flush_mask. + */ + cpumask_setall(&kvm->arch.asid_flush_mask); + kvm_flush_remote_tlbs(kvm); + } +} + +static void kvm_vz_flush_shadow_memslot(struct kvm *kvm, + const struct kvm_memory_slot *slot) +{ + kvm_vz_flush_shadow_all(kvm); +} + +static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int preserve_guest_tlb; + + preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_vz_vcpu_load_tlb(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_load_wired(vcpu); +} + +static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int r; + + kvm_vz_acquire_htimer(vcpu); + /* Check if we have any exceptions/interrupts pending */ + kvm_mips_deliver_interrupts(vcpu, read_gc0_cause()); + + kvm_vz_check_requests(vcpu, cpu); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + + r = vcpu->arch.vcpu_run(run, vcpu); + + kvm_vz_vcpu_save_wired(vcpu); + + return r; +} + +static struct kvm_mips_callbacks kvm_vz_callbacks = { + .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable, + .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss, + .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss, + .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss, + .handle_addr_err_st = kvm_trap_vz_no_handler, + .handle_addr_err_ld = kvm_trap_vz_no_handler, + .handle_syscall = kvm_trap_vz_no_handler, + .handle_res_inst = kvm_trap_vz_no_handler, + .handle_break = kvm_trap_vz_no_handler, + .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, + .handle_guest_exit = kvm_trap_vz_handle_guest_exit, + + .hardware_enable = kvm_vz_hardware_enable, + .hardware_disable = kvm_vz_hardware_disable, + .check_extension = kvm_vz_check_extension, + .vcpu_init = kvm_vz_vcpu_init, + .vcpu_uninit = kvm_vz_vcpu_uninit, + .vcpu_setup = kvm_vz_vcpu_setup, + .flush_shadow_all = kvm_vz_flush_shadow_all, + .flush_shadow_memslot = kvm_vz_flush_shadow_memslot, + .gva_to_gpa = kvm_vz_gva_to_gpa_cb, + .queue_timer_int = kvm_vz_queue_timer_int_cb, + .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb, + .queue_io_int = kvm_vz_queue_io_int_cb, + .dequeue_io_int = kvm_vz_dequeue_io_int_cb, + .irq_deliver = kvm_vz_irq_deliver_cb, + .irq_clear = kvm_vz_irq_clear_cb, + .num_regs = kvm_vz_num_regs, + .copy_reg_indices = kvm_vz_copy_reg_indices, + .get_one_reg = kvm_vz_get_one_reg, + .set_one_reg = kvm_vz_set_one_reg, + .vcpu_load = kvm_vz_vcpu_load, + .vcpu_put = kvm_vz_vcpu_put, + .vcpu_run = kvm_vz_vcpu_run, + .vcpu_reenter = kvm_vz_vcpu_reenter, +}; + +int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) +{ + if (!cpu_has_vz) + return -ENODEV; + + /* + * VZ requires at least 2 KScratch registers, so it should have been + * possible to allocate pgd_reg. + */ + if (WARN(pgd_reg == -1, + "pgd_reg not allocated even though cpu_has_vz\n")) + return -ENODEV; + + pr_info("Starting KVM with MIPS VZ extensions\n"); + + *install_callbacks = &kvm_vz_callbacks; + return 0; +} |