diff options
Diffstat (limited to 'arch/mips/kvm')
-rw-r--r-- | arch/mips/kvm/Kconfig | 44 | ||||
-rw-r--r-- | arch/mips/kvm/Makefile | 20 | ||||
-rw-r--r-- | arch/mips/kvm/emulate.c | 1650 | ||||
-rw-r--r-- | arch/mips/kvm/entry.c | 916 | ||||
-rw-r--r-- | arch/mips/kvm/fpu.S | 125 | ||||
-rw-r--r-- | arch/mips/kvm/hypcall.c | 53 | ||||
-rw-r--r-- | arch/mips/kvm/interrupt.c | 56 | ||||
-rw-r--r-- | arch/mips/kvm/interrupt.h | 39 | ||||
-rw-r--r-- | arch/mips/kvm/loongson_ipi.c | 214 | ||||
-rw-r--r-- | arch/mips/kvm/mips.c | 1646 | ||||
-rw-r--r-- | arch/mips/kvm/mmu.c | 757 | ||||
-rw-r--r-- | arch/mips/kvm/msa.S | 161 | ||||
-rw-r--r-- | arch/mips/kvm/stats.c | 63 | ||||
-rw-r--r-- | arch/mips/kvm/tlb.c | 525 | ||||
-rw-r--r-- | arch/mips/kvm/trace.h | 346 | ||||
-rw-r--r-- | arch/mips/kvm/vz.c | 3325 |
16 files changed, 9940 insertions, 0 deletions
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig new file mode 100644 index 0000000000..a8cdba75f9 --- /dev/null +++ b/arch/mips/kvm/Kconfig @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# KVM configuration +# +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + help + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM + depends on MIPS_FP_SUPPORT + select EXPORT_UASM + select PREEMPT_NOTIFIERS + select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_EVENTFD + select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_MMIO + select MMU_NOTIFIER + select INTERVAL_TREE + select KVM_GENERIC_HARDWARE_ENABLING + help + Support for hosting Guest kernels. + +config KVM_MIPS_DEBUG_COP0_COUNTERS + bool "Maintain counters for COP0 accesses" + depends on KVM + help + Maintain statistics for Guest COP0 accesses. + A histogram of COP0 accesses is printed when the VM is + shutdown. + + If unsure, say N. + +endif # VIRTUALIZATION diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile new file mode 100644 index 0000000000..805aeea216 --- /dev/null +++ b/arch/mips/kvm/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for KVM support for MIPS +# + +include $(srctree)/virt/kvm/Makefile.kvm + +ccflags-y += -Ivirt/kvm -Iarch/mips/kvm + +kvm-$(CONFIG_CPU_HAS_MSA) += msa.o + +kvm-y += mips.o emulate.o entry.o \ + interrupt.o stats.o \ + fpu.o +kvm-y += hypcall.o +kvm-y += mmu.o +kvm-$(CONFIG_CPU_LOONGSON64) += loongson_ipi.o + +kvm-y += vz.o +obj-$(CONFIG_KVM) += kvm.o +obj-y += tlb.o diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c new file mode 100644 index 0000000000..e64372b8f6 --- /dev/null +++ b/arch/mips/kvm/emulate.c @@ -0,0 +1,1650 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Instruction/Exception emulation + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/ktime.h> +#include <linux/kvm_host.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/memblock.h> +#include <linux/random.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/cacheops.h> +#include <asm/cpu-info.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/inst.h> + +#undef CONFIG_MIPS_MT +#include <asm/r4kcache.h> +#define CONFIG_MIPS_MT + +#include "interrupt.h" + +#include "trace.h" + +/* + * Compute the return address and do emulate branch simulation, if required. + * This function should be called only in branch delay slot active. + */ +static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, + unsigned long *out) +{ + unsigned int dspcontrol; + union mips_instruction insn; + struct kvm_vcpu_arch *arch = &vcpu->arch; + long epc = instpc; + long nextpc; + int err; + + if (epc & 3) { + kvm_err("%s: unaligned epc\n", __func__); + return -EINVAL; + } + + /* Read the instruction */ + err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word); + if (err) + return err; + + switch (insn.i_format.opcode) { + /* jr and jalr are in r_format format. */ + case spec_op: + switch (insn.r_format.func) { + case jalr_op: + arch->gprs[insn.r_format.rd] = epc + 8; + fallthrough; + case jr_op: + nextpc = arch->gprs[insn.r_format.rs]; + break; + default: + return -EINVAL; + } + break; + + /* + * This group contains: + * bltz_op, bgez_op, bltzl_op, bgezl_op, + * bltzal_op, bgezal_op, bltzall_op, bgezall_op. + */ + case bcond_op: + switch (insn.i_format.rt) { + case bltz_op: + case bltzl_op: + if ((long)arch->gprs[insn.i_format.rs] < 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgez_op: + case bgezl_op: + if ((long)arch->gprs[insn.i_format.rs] >= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bltzal_op: + case bltzall_op: + arch->gprs[31] = epc + 8; + if ((long)arch->gprs[insn.i_format.rs] < 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgezal_op: + case bgezall_op: + arch->gprs[31] = epc + 8; + if ((long)arch->gprs[insn.i_format.rs] >= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + case bposge32_op: + if (!cpu_has_dsp) { + kvm_err("%s: DSP branch but not DSP ASE\n", + __func__); + return -EINVAL; + } + + dspcontrol = rddsp(0x01); + + if (dspcontrol >= 32) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + default: + return -EINVAL; + } + break; + + /* These are unconditional and in j_format. */ + case jal_op: + arch->gprs[31] = instpc + 8; + fallthrough; + case j_op: + epc += 4; + epc >>= 28; + epc <<= 28; + epc |= (insn.j_format.target << 2); + nextpc = epc; + break; + + /* These are conditional and in i_format. */ + case beq_op: + case beql_op: + if (arch->gprs[insn.i_format.rs] == + arch->gprs[insn.i_format.rt]) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bne_op: + case bnel_op: + if (arch->gprs[insn.i_format.rs] != + arch->gprs[insn.i_format.rt]) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case blez_op: /* POP06 */ +#ifndef CONFIG_CPU_MIPSR6 + case blezl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; + if ((long)arch->gprs[insn.i_format.rs] <= 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + case bgtz_op: /* POP07 */ +#ifndef CONFIG_CPU_MIPSR6 + case bgtzl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; + if ((long)arch->gprs[insn.i_format.rs] > 0) + epc = epc + 4 + (insn.i_format.simmediate << 2); + else + epc += 8; + nextpc = epc; + break; + + /* And now the FPA/cp1 branch instructions. */ + case cop1_op: + kvm_err("%s: unsupported cop1_op\n", __func__); + return -EINVAL; + +#ifdef CONFIG_CPU_MIPSR6 + /* R6 added the following compact branches with forbidden slots */ + case blezl_op: /* POP26 */ + case bgtzl_op: /* POP27 */ + /* only rt == 0 isn't compact branch */ + if (insn.i_format.rt != 0) + goto compact_branch; + return -EINVAL; + case pop10_op: + case pop30_op: + /* only rs == rt == 0 is reserved, rest are compact branches */ + if (insn.i_format.rs != 0 || insn.i_format.rt != 0) + goto compact_branch; + return -EINVAL; + case pop66_op: + case pop76_op: + /* only rs == 0 isn't compact branch */ + if (insn.i_format.rs != 0) + goto compact_branch; + return -EINVAL; +compact_branch: + /* + * If we've hit an exception on the forbidden slot, then + * the branch must not have been taken. + */ + epc += 8; + nextpc = epc; + break; +#else +compact_branch: + /* Fall through - Compact branches not supported before R6 */ +#endif + default: + return -EINVAL; + } + + *out = nextpc; + return 0; +} + +enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) +{ + int err; + + if (cause & CAUSEF_BD) { + err = kvm_compute_return_epc(vcpu, vcpu->arch.pc, + &vcpu->arch.pc); + if (err) + return EMULATE_FAIL; + } else { + vcpu->arch.pc += 4; + } + + kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc); + + return EMULATE_DONE; +} + +/** + * kvm_get_badinstr() - Get bad instruction encoding. + * @opc: Guest pointer to faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the faulting instruction, using the saved + * BadInstr register value if it exists, otherwise falling back to reading guest + * memory at @opc. + * + * Returns: The instruction encoding of the faulting instruction. + */ +int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstr) { + *out = vcpu->arch.host_cp0_badinstr; + return 0; + } else { + WARN_ONCE(1, "CPU doesn't have BadInstr register\n"); + return -EINVAL; + } +} + +/** + * kvm_get_badinstrp() - Get bad prior instruction encoding. + * @opc: Guest pointer to prior faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the prior faulting instruction (the branch + * containing the delay slot which faulted), using the saved BadInstrP register + * value if it exists, otherwise falling back to reading guest memory at @opc. + * + * Returns: The instruction encoding of the prior faulting instruction. + */ +int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstrp) { + *out = vcpu->arch.host_cp0_badinstrp; + return 0; + } else { + WARN_ONCE(1, "CPU doesn't have BadInstrp register\n"); + return -EINVAL; + } +} + +/** + * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled. + * @vcpu: Virtual CPU. + * + * Returns: 1 if the CP0_Count timer is disabled by either the guest + * CP0_Cause.DC bit or the count_ctl.DC bit. + * 0 otherwise (in which case CP0_Count timer is running). + */ +int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + + return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || + (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); +} + +/** + * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count. + * + * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static u32 kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now) +{ + s64 now_ns, periods; + u64 delta; + + now_ns = ktime_to_ns(now); + delta = now_ns + vcpu->arch.count_dyn_bias; + + if (delta >= vcpu->arch.count_period) { + /* If delta is out of safe range the bias needs adjusting */ + periods = div64_s64(now_ns, vcpu->arch.count_period); + vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period; + /* Recalculate delta with new bias */ + delta = now_ns + vcpu->arch.count_dyn_bias; + } + + /* + * We've ensured that: + * delta < count_period + * + * Therefore the intermediate delta*count_hz will never overflow since + * at the boundary condition: + * delta = count_period + * delta = NSEC_PER_SEC * 2^32 / count_hz + * delta * count_hz = NSEC_PER_SEC * 2^32 + */ + return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC); +} + +/** + * kvm_mips_count_time() - Get effective current time. + * @vcpu: Virtual CPU. + * + * Get effective monotonic ktime. This is usually a straightforward ktime_get(), + * except when the master disable bit is set in count_ctl, in which case it is + * count_resume, i.e. the time that the count was disabled. + * + * Returns: Effective monotonic ktime for CP0_Count. + */ +static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) +{ + if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + return vcpu->arch.count_resume; + + return ktime_get(); +} + +/** + * kvm_mips_read_count_running() - Read the current count value as if running. + * @vcpu: Virtual CPU. + * @now: Kernel time to read CP0_Count at. + * + * Returns the current guest CP0_Count register at time @now and handles if the + * timer interrupt is pending and hasn't been handled yet. + * + * Returns: The current value of the guest CP0_Count register. + */ +static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + ktime_t expires, threshold; + u32 count, compare; + int running; + + /* Calculate the biased and scaled guest CP0_Count */ + count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + compare = kvm_read_c0_guest_compare(cop0); + + /* + * Find whether CP0_Count has reached the closest timer interrupt. If + * not, we shouldn't inject it. + */ + if ((s32)(count - compare) < 0) + return count; + + /* + * The CP0_Count we're going to return has already reached the closest + * timer interrupt. Quickly check if it really is a new interrupt by + * looking at whether the interval until the hrtimer expiry time is + * less than 1/4 of the timer period. + */ + expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer); + threshold = ktime_add_ns(now, vcpu->arch.count_period / 4); + if (ktime_before(expires, threshold)) { + /* + * Cancel it while we handle it so there's no chance of + * interference with the timeout handler. + */ + running = hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* + * Restart the timer if it was running based on the expiry time + * we read, so that we don't push it back 2 periods. + */ + if (running) { + expires = ktime_add_ns(expires, + vcpu->arch.count_period); + hrtimer_start(&vcpu->arch.comparecount_timer, expires, + HRTIMER_MODE_ABS); + } + } + + return count; +} + +/** + * kvm_mips_read_count() - Read the current count value. + * @vcpu: Virtual CPU. + * + * Read the current guest CP0_Count value, taking into account whether the timer + * is stopped. + * + * Returns: The current guest CP0_Count value. + */ +u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + + /* If count disabled just read static copy of count */ + if (kvm_mips_count_disabled(vcpu)) + return kvm_read_c0_guest_count(cop0); + + return kvm_mips_read_count_running(vcpu, ktime_get()); +} + +/** + * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer. + * @vcpu: Virtual CPU. + * @count: Output pointer for CP0_Count value at point of freeze. + * + * Freeze the hrtimer safely and return both the ktime and the CP0_Count value + * at the point it was frozen. It is guaranteed that any pending interrupts at + * the point it was frozen are handled, and none after that point. + * + * This is useful where the time/CP0_Count is needed in the calculation of the + * new parameters. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + * + * Returns: The ktime at the point of freeze. + */ +ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) +{ + ktime_t now; + + /* stop hrtimer before finding time */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + now = ktime_get(); + + /* find count at this point and handle pending hrtimer */ + *count = kvm_mips_read_count_running(vcpu, now); + + return now; +} + +/** + * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry. + * @vcpu: Virtual CPU. + * @now: ktime at point of resume. + * @count: CP0_Count at point of resume. + * + * Resumes the timer and updates the timer expiry based on @now and @count. + * This can be used in conjunction with kvm_mips_freeze_timer() when timer + * parameters need to be changed. + * + * It is guaranteed that a timer interrupt immediately after resume will be + * handled, but not if CP_Compare is exactly at @count. That case is already + * handled by kvm_mips_freeze_timer(). + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, + ktime_t now, u32 count) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + u32 compare; + u64 delta; + ktime_t expire; + + /* Calculate timeout (wrap 0 to 2^32) */ + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(u32)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + expire = ktime_add_ns(now, delta); + + /* Update hrtimer to use new timeout */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS); +} + +/** + * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry. + * @vcpu: Virtual CPU. + * @before: Time before Count was saved, lower bound of drift calculation. + * @count: CP0_Count at point of restore. + * @min_drift: Minimum amount of drift permitted before correction. + * Must be <= 0. + * + * Restores the timer from a particular @count, accounting for drift. This can + * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is + * to be used for a period of time, but the exact ktime corresponding to the + * final Count that must be restored is not known. + * + * It is gauranteed that a timer interrupt immediately after restore will be + * handled, but not if CP0_Compare is exactly at @count. That case should + * already be handled when the hardware timer state is saved. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not + * stopped). + * + * Returns: Amount of correction to count_bias due to drift. + */ +int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, + u32 count, int min_drift) +{ + ktime_t now, count_time; + u32 now_count, before_count; + u64 delta; + int drift, ret = 0; + + /* Calculate expected count at before */ + before_count = vcpu->arch.count_bias + + kvm_mips_ktime_to_count(vcpu, before); + + /* + * Detect significantly negative drift, where count is lower than + * expected. Some negative drift is expected when hardware counter is + * set after kvm_mips_freeze_timer(), and it is harmless to allow the + * time to jump forwards a little, within reason. If the drift is too + * significant, adjust the bias to avoid a big Guest.CP0_Count jump. + */ + drift = count - before_count; + if (drift < min_drift) { + count_time = before; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Calculate expected count right now */ + now = ktime_get(); + now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + + /* + * Detect positive drift, where count is higher than expected, and + * adjust the bias to avoid guest time going backwards. + */ + drift = count - now_count; + if (drift > 0) { + count_time = now; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Subtract nanosecond delta to find ktime when count was read */ + delta = (u64)(u32)(now_count - count); + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + count_time = ktime_sub_ns(now, delta); + +resume: + /* Resume using the calculated ktime */ + kvm_mips_resume_hrtimer(vcpu, count_time, count); + return ret; +} + +/** + * kvm_mips_write_count() - Modify the count and update timer. + * @vcpu: Virtual CPU. + * @count: Guest CP0_Count value to set. + * + * Sets the CP0_Count value and updates the timer accordingly. + */ +void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + ktime_t now; + + /* Calculate bias */ + now = kvm_mips_count_time(vcpu); + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + if (kvm_mips_count_disabled(vcpu)) + /* The timer's disabled, adjust the static count */ + kvm_write_c0_guest_count(cop0, count); + else + /* Update timeout */ + kvm_mips_resume_hrtimer(vcpu, now, count); +} + +/** + * kvm_mips_init_count() - Initialise timer. + * @vcpu: Virtual CPU. + * @count_hz: Frequency of timer. + * + * Initialise the timer to the specified frequency, zero it, and set it going if + * it's enabled. + */ +void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz) +{ + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Starting at 0 */ + kvm_mips_write_count(vcpu, 0); +} + +/** + * kvm_mips_set_count_hz() - Update the frequency of the timer. + * @vcpu: Virtual CPU. + * @count_hz: Frequency of CP0_Count timer in Hz. + * + * Change the frequency of the CP0_Count timer. This is done atomically so that + * CP0_Count is continuous and no timer interrupt is lost. + * + * Returns: -EINVAL if @count_hz is out of range. + * 0 on success. + */ +int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + int dc; + ktime_t now; + u32 count; + + /* ensure the frequency is in a sensible range... */ + if (count_hz <= 0 || count_hz > NSEC_PER_SEC) + return -EINVAL; + /* ... and has actually changed */ + if (vcpu->arch.count_hz == count_hz) + return 0; + + /* Safely freeze timer so we can keep it continuous */ + dc = kvm_mips_count_disabled(vcpu); + if (dc) { + now = kvm_mips_count_time(vcpu); + count = kvm_read_c0_guest_count(cop0); + } else { + now = kvm_mips_freeze_hrtimer(vcpu, &count); + } + + /* Update the frequency */ + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Calculate adjusted bias so dynamic count is unchanged */ + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + /* Update and resume hrtimer */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); + return 0; +} + +/** + * kvm_mips_write_compare() - Modify compare and update timer. + * @vcpu: Virtual CPU. + * @compare: New CP0_Compare value. + * @ack: Whether to acknowledge timer interrupt. + * + * Update CP0_Compare to a new value and update the timeout. + * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure + * any pending timer interrupt is preserved. + */ +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + int dc; + u32 old_compare = kvm_read_c0_guest_compare(cop0); + s32 delta = compare - old_compare; + u32 cause; + ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */ + u32 count; + + /* if unchanged, must just be an ack */ + if (old_compare == compare) { + if (!ack) + return; + kvm_mips_callbacks->dequeue_timer_int(vcpu); + kvm_write_c0_guest_compare(cop0, compare); + return; + } + + /* + * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted + * too to prevent guest CP0_Count hitting guest CP0_Compare. + * + * The new GTOffset corresponds to the new value of CP0_Compare, and is + * set prior to it being written into the guest context. We disable + * preemption until the new value is written to prevent restore of a + * GTOffset corresponding to the old CP0_Compare value. + */ + if (delta > 0) { + preempt_disable(); + write_c0_gtoffset(compare - read_c0_count()); + back_to_back_c0_hazard(); + } + + /* freeze_hrtimer() takes care of timer interrupts <= count */ + dc = kvm_mips_count_disabled(vcpu); + if (!dc) + now = kvm_mips_freeze_hrtimer(vcpu, &count); + + if (ack) + kvm_mips_callbacks->dequeue_timer_int(vcpu); + else + /* + * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so + * preserve guest CP0_Cause.TI if we don't want to ack it. + */ + cause = kvm_read_c0_guest_cause(cop0); + + kvm_write_c0_guest_compare(cop0, compare); + + if (delta > 0) + preempt_enable(); + + back_to_back_c0_hazard(); + + if (!ack && cause & CAUSEF_TI) + kvm_write_c0_guest_cause(cop0, cause); + + /* resume_hrtimer() takes care of timer interrupts > count */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); + + /* + * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change + * until after the new CP0_Compare is written, otherwise new guest + * CP0_Count could hit new guest CP0_Compare. + */ + if (delta <= 0) + write_c0_gtoffset(compare - read_c0_count()); +} + +/** + * kvm_mips_count_disable() - Disable count. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer. A timer interrupt on or before the final stop + * time will be handled but not after. + * + * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or + * count_ctl.DC has been set (count disabled). + * + * Returns: The time that the timer was stopped. + */ +static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + u32 count; + ktime_t now; + + /* Stop hrtimer */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Set the static count from the dynamic count, handling pending TI */ + now = ktime_get(); + count = kvm_mips_read_count_running(vcpu, now); + kvm_write_c0_guest_count(cop0, count); + + return now; +} + +/** + * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or + * before the final stop time will be handled if the timer isn't disabled by + * count_ctl.DC, but not after. + * + * Assumes CP0_Cause.DC is clear (count enabled). + */ +void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + + kvm_set_c0_guest_cause(cop0, CAUSEF_DC); + if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + kvm_mips_count_disable(vcpu); +} + +/** + * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after + * the start time will be handled if the timer isn't disabled by count_ctl.DC, + * potentially before even returning, so the caller should be careful with + * ordering of CP0_Cause modifications so as not to lose it. + * + * Assumes CP0_Cause.DC is set (count disabled). + */ +void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + u32 count; + + kvm_clear_c0_guest_cause(cop0, CAUSEF_DC); + + /* + * Set the dynamic count to match the static count. + * This starts the hrtimer if count_ctl.DC allows it. + * Otherwise it conveniently updates the biases. + */ + count = kvm_read_c0_guest_count(cop0); + kvm_mips_write_count(vcpu, count); +} + +/** + * kvm_mips_set_count_ctl() - Update the count control KVM register. + * @vcpu: Virtual CPU. + * @count_ctl: Count control register new value. + * + * Set the count control KVM register. The timer is updated accordingly. + * + * Returns: -EINVAL if reserved bits are set. + * 0 on success. + */ +int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + s64 changed = count_ctl ^ vcpu->arch.count_ctl; + s64 delta; + ktime_t expire, now; + u32 count, compare; + + /* Only allow defined bits to be changed */ + if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC)) + return -EINVAL; + + /* Apply new value */ + vcpu->arch.count_ctl = count_ctl; + + /* Master CP0_Count disable */ + if (changed & KVM_REG_MIPS_COUNT_CTL_DC) { + /* Is CP0_Cause.DC already disabling CP0_Count? */ + if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) { + if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) + /* Just record the current time */ + vcpu->arch.count_resume = ktime_get(); + } else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) { + /* disable timer and record current time */ + vcpu->arch.count_resume = kvm_mips_count_disable(vcpu); + } else { + /* + * Calculate timeout relative to static count at resume + * time (wrap 0 to 2^32). + */ + count = kvm_read_c0_guest_count(cop0); + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(u32)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, + vcpu->arch.count_hz); + expire = ktime_add_ns(vcpu->arch.count_resume, delta); + + /* Handle pending interrupt */ + now = ktime_get(); + if (ktime_compare(now, expire) >= 0) + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* Resume hrtimer without changing bias */ + count = kvm_mips_read_count_running(vcpu, now); + kvm_mips_resume_hrtimer(vcpu, now, count); + } + } + + return 0; +} + +/** + * kvm_mips_set_count_resume() - Update the count resume KVM register. + * @vcpu: Virtual CPU. + * @count_resume: Count resume register new value. + * + * Set the count resume KVM register. + * + * Returns: -EINVAL if out of valid range (0..now). + * 0 on success. + */ +int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume) +{ + /* + * It doesn't make sense for the resume time to be in the future, as it + * would be possible for the next interrupt to be more than a full + * period in the future. + */ + if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get())) + return -EINVAL; + + vcpu->arch.count_resume = ns_to_ktime(count_resume); + return 0; +} + +/** + * kvm_mips_count_timeout() - Push timer forward on timeout. + * @vcpu: Virtual CPU. + * + * Handle an hrtimer event by push the hrtimer forward a period. + * + * Returns: The hrtimer_restart value to return to the hrtimer subsystem. + */ +enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu) +{ + /* Add the Count period to the current expiry time */ + hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer, + vcpu->arch.count_period); + return HRTIMER_RESTART; +} + +enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) +{ + kvm_debug("[%#lx] !!!WAIT!!! (%#lx)\n", vcpu->arch.pc, + vcpu->arch.pending_exceptions); + + ++vcpu->stat.wait_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); + if (!vcpu->arch.pending_exceptions) { + kvm_vz_lose_htimer(vcpu); + vcpu->arch.wait = 1; + kvm_vcpu_halt(vcpu); + + /* + * We are runnable, then definitely go off to user space to + * check if any I/O interrupts are pending. + */ + if (kvm_arch_vcpu_runnable(vcpu)) + vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + } + + return EMULATE_DONE; +} + +enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, + u32 cause, + struct kvm_vcpu *vcpu) +{ + int r; + enum emulation_result er; + u32 rt; + struct kvm_run *run = vcpu->run; + void *data = run->mmio.data; + unsigned int imme; + unsigned long curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + rt = inst.i_format.rt; + + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + goto out_fail; + + switch (inst.i_format.opcode) { +#if defined(CONFIG_64BIT) + case sd_op: + run->mmio.len = 8; + *(u64 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; +#endif + + case sw_op: + run->mmio.len = 4; + *(u32 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + + case sh_op: + run->mmio.len = 2; + *(u16 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u16 *)data); + break; + + case sb_op: + run->mmio.len = 1; + *(u8 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u8 *)data); + break; + + case swl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + *(u32 *)data = ((*(u32 *)data) & 0xffffff00) | + (vcpu->arch.gprs[rt] >> 24); + break; + case 1: + *(u32 *)data = ((*(u32 *)data) & 0xffff0000) | + (vcpu->arch.gprs[rt] >> 16); + break; + case 2: + *(u32 *)data = ((*(u32 *)data) & 0xff000000) | + (vcpu->arch.gprs[rt] >> 8); + break; + case 3: + *(u32 *)data = vcpu->arch.gprs[rt]; + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SWL: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + + case swr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + *(u32 *)data = vcpu->arch.gprs[rt]; + break; + case 1: + *(u32 *)data = ((*(u32 *)data) & 0xff) | + (vcpu->arch.gprs[rt] << 8); + break; + case 2: + *(u32 *)data = ((*(u32 *)data) & 0xffff) | + (vcpu->arch.gprs[rt] << 16); + break; + case 3: + *(u32 *)data = ((*(u32 *)data) & 0xffffff) | + (vcpu->arch.gprs[rt] << 24); + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SWR: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + +#if defined(CONFIG_64BIT) + case sdl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff00) | + ((vcpu->arch.gprs[rt] >> 56) & 0xff); + break; + case 1: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff0000) | + ((vcpu->arch.gprs[rt] >> 48) & 0xffff); + break; + case 2: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffff000000) | + ((vcpu->arch.gprs[rt] >> 40) & 0xffffff); + break; + case 3: + *(u64 *)data = ((*(u64 *)data) & 0xffffffff00000000) | + ((vcpu->arch.gprs[rt] >> 32) & 0xffffffff); + break; + case 4: + *(u64 *)data = ((*(u64 *)data) & 0xffffff0000000000) | + ((vcpu->arch.gprs[rt] >> 24) & 0xffffffffff); + break; + case 5: + *(u64 *)data = ((*(u64 *)data) & 0xffff000000000000) | + ((vcpu->arch.gprs[rt] >> 16) & 0xffffffffffff); + break; + case 6: + *(u64 *)data = ((*(u64 *)data) & 0xff00000000000000) | + ((vcpu->arch.gprs[rt] >> 8) & 0xffffffffffffff); + break; + case 7: + *(u64 *)data = vcpu->arch.gprs[rt]; + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SDL: eaddr: %#lx, gpr: %#lx, data: %llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; + + case sdr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + *(u64 *)data = vcpu->arch.gprs[rt]; + break; + case 1: + *(u64 *)data = ((*(u64 *)data) & 0xff) | + (vcpu->arch.gprs[rt] << 8); + break; + case 2: + *(u64 *)data = ((*(u64 *)data) & 0xffff) | + (vcpu->arch.gprs[rt] << 16); + break; + case 3: + *(u64 *)data = ((*(u64 *)data) & 0xffffff) | + (vcpu->arch.gprs[rt] << 24); + break; + case 4: + *(u64 *)data = ((*(u64 *)data) & 0xffffffff) | + (vcpu->arch.gprs[rt] << 32); + break; + case 5: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffff) | + (vcpu->arch.gprs[rt] << 40); + break; + case 6: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff) | + (vcpu->arch.gprs[rt] << 48); + break; + case 7: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff) | + (vcpu->arch.gprs[rt] << 56); + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SDR: eaddr: %#lx, gpr: %#lx, data: %llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; +#endif + +#ifdef CONFIG_CPU_LOONGSON64 + case sdc2_op: + rt = inst.loongson3_lsdc2_format.rt; + switch (inst.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden sdc2 instructions. + * opcode1 instruction + * 0x0 gssbx: store 1 bytes from GPR + * 0x1 gsshx: store 2 bytes from GPR + * 0x2 gsswx: store 4 bytes from GPR + * 0x3 gssdx: store 8 bytes from GPR + */ + case 0x0: + run->mmio.len = 1; + *(u8 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSBX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u8 *)data); + break; + case 0x1: + run->mmio.len = 2; + *(u16 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSSHX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u16 *)data); + break; + case 0x2: + run->mmio.len = 4; + *(u32 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSWX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + case 0x3: + run->mmio.len = 8; + *(u64 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSDX: eaddr: %#lx, gpr: %#lx, data: %#llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; + default: + kvm_err("Godson Extended GS-Store not yet supported (inst=0x%08x)\n", + inst.word); + break; + } + break; +#endif + default: + kvm_err("Store not yet supported (inst=0x%08x)\n", + inst.word); + goto out_fail; + } + + vcpu->mmio_needed = 1; + run->mmio.is_write = 1; + vcpu->mmio_is_write = 1; + + r = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, + run->mmio.phys_addr, run->mmio.len, data); + + if (!r) { + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + + return EMULATE_DO_MMIO; + +out_fail: + /* Rollback PC if emulation was unsuccessful */ + vcpu->arch.pc = curr_pc; + return EMULATE_FAIL; +} + +enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, + u32 cause, struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + enum emulation_result er; + unsigned long curr_pc; + u32 op, rt; + unsigned int imme; + + rt = inst.i_format.rt; + op = inst.i_format.opcode; + + /* + * Find the resume PC now while we have safe and easy access to the + * prior branch instruction, and save it for + * kvm_mips_complete_mmio_load() to restore later. + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + vcpu->arch.io_pc = vcpu->arch.pc; + vcpu->arch.pc = curr_pc; + + vcpu->arch.io_gpr = rt; + + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + return EMULATE_FAIL; + + vcpu->mmio_needed = 2; /* signed */ + switch (op) { +#if defined(CONFIG_64BIT) + case ld_op: + run->mmio.len = 8; + break; + + case lwu_op: + vcpu->mmio_needed = 1; /* unsigned */ + fallthrough; +#endif + case lw_op: + run->mmio.len = 4; + break; + + case lhu_op: + vcpu->mmio_needed = 1; /* unsigned */ + fallthrough; + case lh_op: + run->mmio.len = 2; + break; + + case lbu_op: + vcpu->mmio_needed = 1; /* unsigned */ + fallthrough; + case lb_op: + run->mmio.len = 1; + break; + + case lwl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + vcpu->mmio_needed = 3; /* 1 byte */ + break; + case 1: + vcpu->mmio_needed = 4; /* 2 bytes */ + break; + case 2: + vcpu->mmio_needed = 5; /* 3 bytes */ + break; + case 3: + vcpu->mmio_needed = 6; /* 4 bytes */ + break; + default: + break; + } + break; + + case lwr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + vcpu->mmio_needed = 7; /* 4 bytes */ + break; + case 1: + vcpu->mmio_needed = 8; /* 3 bytes */ + break; + case 2: + vcpu->mmio_needed = 9; /* 2 bytes */ + break; + case 3: + vcpu->mmio_needed = 10; /* 1 byte */ + break; + default: + break; + } + break; + +#if defined(CONFIG_64BIT) + case ldl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + vcpu->mmio_needed = 11; /* 1 byte */ + break; + case 1: + vcpu->mmio_needed = 12; /* 2 bytes */ + break; + case 2: + vcpu->mmio_needed = 13; /* 3 bytes */ + break; + case 3: + vcpu->mmio_needed = 14; /* 4 bytes */ + break; + case 4: + vcpu->mmio_needed = 15; /* 5 bytes */ + break; + case 5: + vcpu->mmio_needed = 16; /* 6 bytes */ + break; + case 6: + vcpu->mmio_needed = 17; /* 7 bytes */ + break; + case 7: + vcpu->mmio_needed = 18; /* 8 bytes */ + break; + default: + break; + } + break; + + case ldr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + vcpu->mmio_needed = 19; /* 8 bytes */ + break; + case 1: + vcpu->mmio_needed = 20; /* 7 bytes */ + break; + case 2: + vcpu->mmio_needed = 21; /* 6 bytes */ + break; + case 3: + vcpu->mmio_needed = 22; /* 5 bytes */ + break; + case 4: + vcpu->mmio_needed = 23; /* 4 bytes */ + break; + case 5: + vcpu->mmio_needed = 24; /* 3 bytes */ + break; + case 6: + vcpu->mmio_needed = 25; /* 2 bytes */ + break; + case 7: + vcpu->mmio_needed = 26; /* 1 byte */ + break; + default: + break; + } + break; +#endif + +#ifdef CONFIG_CPU_LOONGSON64 + case ldc2_op: + rt = inst.loongson3_lsdc2_format.rt; + switch (inst.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden ldc2 instructions. + * opcode1 instruction + * 0x0 gslbx: store 1 bytes from GPR + * 0x1 gslhx: store 2 bytes from GPR + * 0x2 gslwx: store 4 bytes from GPR + * 0x3 gsldx: store 8 bytes from GPR + */ + case 0x0: + run->mmio.len = 1; + vcpu->mmio_needed = 27; /* signed */ + break; + case 0x1: + run->mmio.len = 2; + vcpu->mmio_needed = 28; /* signed */ + break; + case 0x2: + run->mmio.len = 4; + vcpu->mmio_needed = 29; /* signed */ + break; + case 0x3: + run->mmio.len = 8; + vcpu->mmio_needed = 30; /* signed */ + break; + default: + kvm_err("Godson Extended GS-Load for float not yet supported (inst=0x%08x)\n", + inst.word); + break; + } + break; +#endif + + default: + kvm_err("Load not yet supported (inst=0x%08x)\n", + inst.word); + vcpu->mmio_needed = 0; + return EMULATE_FAIL; + } + + run->mmio.is_write = 0; + vcpu->mmio_is_write = 0; + + r = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, + run->mmio.phys_addr, run->mmio.len, run->mmio.data); + + if (!r) { + kvm_mips_complete_mmio_load(vcpu); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + + return EMULATE_DO_MMIO; +} + +enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; + enum emulation_result er = EMULATE_DONE; + + if (run->mmio.len > sizeof(*gpr)) { + kvm_err("Bad MMIO length: %d", run->mmio.len); + er = EMULATE_FAIL; + goto done; + } + + /* Restore saved resume PC */ + vcpu->arch.pc = vcpu->arch.io_pc; + + switch (run->mmio.len) { + case 8: + switch (vcpu->mmio_needed) { + case 11: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff) | + (((*(s64 *)run->mmio.data) & 0xff) << 56); + break; + case 12: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff) | + (((*(s64 *)run->mmio.data) & 0xffff) << 48); + break; + case 13: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff) | + (((*(s64 *)run->mmio.data) & 0xffffff) << 40); + break; + case 14: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff) | + (((*(s64 *)run->mmio.data) & 0xffffffff) << 32); + break; + case 15: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) | + (((*(s64 *)run->mmio.data) & 0xffffffffff) << 24); + break; + case 16: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) | + (((*(s64 *)run->mmio.data) & 0xffffffffffff) << 16); + break; + case 17: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) | + (((*(s64 *)run->mmio.data) & 0xffffffffffffff) << 8); + break; + case 18: + case 19: + *gpr = *(s64 *)run->mmio.data; + break; + case 20: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff00000000000000) | + ((((*(s64 *)run->mmio.data)) >> 8) & 0xffffffffffffff); + break; + case 21: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff000000000000) | + ((((*(s64 *)run->mmio.data)) >> 16) & 0xffffffffffff); + break; + case 22: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff0000000000) | + ((((*(s64 *)run->mmio.data)) >> 24) & 0xffffffffff); + break; + case 23: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff00000000) | + ((((*(s64 *)run->mmio.data)) >> 32) & 0xffffffff); + break; + case 24: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff000000) | + ((((*(s64 *)run->mmio.data)) >> 40) & 0xffffff); + break; + case 25: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff0000) | + ((((*(s64 *)run->mmio.data)) >> 48) & 0xffff); + break; + case 26: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff00) | + ((((*(s64 *)run->mmio.data)) >> 56) & 0xff); + break; + default: + *gpr = *(s64 *)run->mmio.data; + } + break; + + case 4: + switch (vcpu->mmio_needed) { + case 1: + *gpr = *(u32 *)run->mmio.data; + break; + case 2: + *gpr = *(s32 *)run->mmio.data; + break; + case 3: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) | + (((*(s32 *)run->mmio.data) & 0xff) << 24); + break; + case 4: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) | + (((*(s32 *)run->mmio.data) & 0xffff) << 16); + break; + case 5: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) | + (((*(s32 *)run->mmio.data) & 0xffffff) << 8); + break; + case 6: + case 7: + *gpr = *(s32 *)run->mmio.data; + break; + case 8: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff000000) | + ((((*(s32 *)run->mmio.data)) >> 8) & 0xffffff); + break; + case 9: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff0000) | + ((((*(s32 *)run->mmio.data)) >> 16) & 0xffff); + break; + case 10: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff00) | + ((((*(s32 *)run->mmio.data)) >> 24) & 0xff); + break; + default: + *gpr = *(s32 *)run->mmio.data; + } + break; + + case 2: + if (vcpu->mmio_needed == 1) + *gpr = *(u16 *)run->mmio.data; + else + *gpr = *(s16 *)run->mmio.data; + + break; + case 1: + if (vcpu->mmio_needed == 1) + *gpr = *(u8 *)run->mmio.data; + else + *gpr = *(s8 *)run->mmio.data; + break; + } + +done: + return er; +} diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c new file mode 100644 index 0000000000..aceed14aa1 --- /dev/null +++ b/arch/mips/kvm/entry.c @@ -0,0 +1,916 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Generation of main entry point for the guest, exception handling. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + * + * Copyright (C) 2016 Imagination Technologies Ltd. + */ + +#include <linux/kvm_host.h> +#include <linux/log2.h> +#include <asm/mmu_context.h> +#include <asm/msa.h> +#include <asm/setup.h> +#include <asm/tlbex.h> +#include <asm/uasm.h> + +/* Register names */ +#define ZERO 0 +#define AT 1 +#define V0 2 +#define V1 3 +#define A0 4 +#define A1 5 + +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ + +#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 +#define T0 12 +#define T1 13 +#define T2 14 +#define T3 15 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ + +#define S0 16 +#define S1 17 +#define T9 25 +#define K0 26 +#define K1 27 +#define GP 28 +#define SP 29 +#define RA 31 + +/* Some CP0 registers */ +#define C0_PWBASE 5, 5 +#define C0_HWRENA 7, 0 +#define C0_BADVADDR 8, 0 +#define C0_BADINSTR 8, 1 +#define C0_BADINSTRP 8, 2 +#define C0_PGD 9, 7 +#define C0_ENTRYHI 10, 0 +#define C0_GUESTCTL1 10, 4 +#define C0_STATUS 12, 0 +#define C0_GUESTCTL0 12, 6 +#define C0_CAUSE 13, 0 +#define C0_EPC 14, 0 +#define C0_EBASE 15, 1 +#define C0_CONFIG5 16, 5 +#define C0_DDATA_LO 28, 3 +#define C0_ERROREPC 30, 0 + +#define CALLFRAME_SIZ 32 + +#ifdef CONFIG_64BIT +#define ST0_KX_IF_64 ST0_KX +#else +#define ST0_KX_IF_64 0 +#endif + +static unsigned int scratch_vcpu[2] = { C0_DDATA_LO }; +static unsigned int scratch_tmp[2] = { C0_ERROREPC }; + +enum label_id { + label_fpu_1 = 1, + label_msa_1, + label_return_to_host, + label_kernel_asid, + label_exit_common, +}; + +UASM_L_LA(_fpu_1) +UASM_L_LA(_msa_1) +UASM_L_LA(_return_to_host) +UASM_L_LA(_kernel_asid) +UASM_L_LA(_exit_common) + +static void *kvm_mips_build_enter_guest(void *addr); +static void *kvm_mips_build_ret_from_exit(void *addr); +static void *kvm_mips_build_ret_to_guest(void *addr); +static void *kvm_mips_build_ret_to_host(void *addr); + +/* + * The version of this function in tlbex.c uses current_cpu_type(), but for KVM + * we assume symmetry. + */ +static int c0_kscratch(void) +{ + return 31; +} + +/** + * kvm_mips_entry_setup() - Perform global setup for entry code. + * + * Perform global setup for entry code, such as choosing a scratch register. + * + * Returns: 0 on success. + * -errno on failure. + */ +int kvm_mips_entry_setup(void) +{ + /* + * We prefer to use KScratchN registers if they are available over the + * defaults above, which may not work on all cores. + */ + unsigned int kscratch_mask = cpu_data[0].kscratch_mask; + + if (pgd_reg != -1) + kscratch_mask &= ~BIT(pgd_reg); + + /* Pick a scratch register for storing VCPU */ + if (kscratch_mask) { + scratch_vcpu[0] = c0_kscratch(); + scratch_vcpu[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_vcpu[1]); + } + + /* Pick a scratch register to use as a temp for saving state */ + if (kscratch_mask) { + scratch_tmp[0] = c0_kscratch(); + scratch_tmp[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_tmp[1]); + } + + return 0; +} + +static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* Save the VCPU scratch register value in cp0_epc of the stack frame */ + UASM_i_MFC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + + /* Save the temp scratch register value in cp0_cause of stack frame */ + if (scratch_tmp[0] == c0_kscratch()) { + UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + } +} + +static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* + * Restore host scratch register values saved by + * kvm_mips_build_save_scratch(). + */ + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + + if (scratch_tmp[0] == c0_kscratch()) { + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + } +} + +/** + * build_set_exc_base() - Assemble code to write exception base address. + * @p: Code buffer pointer. + * @reg: Source register (generated code may set WG bit in @reg). + * + * Assemble code to modify the exception base address in the EBase register, + * using the appropriately sized access and setting the WG bit if necessary. + */ +static inline void build_set_exc_base(u32 **p, unsigned int reg) +{ + if (cpu_has_ebase_wg) { + /* Set WG so that all the bits get written */ + uasm_i_ori(p, reg, reg, MIPS_EBASE_WG); + UASM_i_MTC0(p, reg, C0_EBASE); + } else { + uasm_i_mtc0(p, reg, C0_EBASE); + } +} + +/** + * kvm_mips_build_vcpu_run() - Assemble function to start running a guest VCPU. + * @addr: Address to start writing code. + * + * Assemble the start of the vcpu_run function to run a guest VCPU. The function + * conforms to the following prototype: + * + * int vcpu_run(struct kvm_vcpu *vcpu); + * + * The exit from the guest and return to the caller is handled by the code + * generated by kvm_mips_build_ret_to_host(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_vcpu_run(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* + * A0: vcpu + */ + + /* k0/k1 not being used in host kernel context */ + UASM_i_ADDIU(&p, K1, SP, -(int)sizeof(struct pt_regs)); + for (i = 16; i < 32; ++i) { + if (i == 24) + i = 28; + UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Save host status */ + uasm_i_mfc0(&p, V0, C0_STATUS); + UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1); + + /* Save scratch registers, will be used to store pointer to vcpu etc */ + kvm_mips_build_save_scratch(&p, V1, K1); + + /* VCPU scratch register has pointer to vcpu */ + UASM_i_MTC0(&p, A0, scratch_vcpu[0], scratch_vcpu[1]); + + /* Offset into vcpu->arch */ + UASM_i_ADDIU(&p, K1, A0, offsetof(struct kvm_vcpu, arch)); + + /* + * Save the host stack to VCPU, used for exception processing + * when we exit from the Guest + */ + UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Save the kernel gp as well */ + UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ + UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + /* load up the new EBASE */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + build_set_exc_base(&p, K0); + + /* + * Now that the new EBASE has been loaded, unset BEV, set + * interrupt mask as it was but make sure that timer interrupts + * are enabled + */ + uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64); + uasm_i_andi(&p, V0, V0, ST0_IM); + uasm_i_or(&p, K0, K0, V0); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_enter_guest() - Assemble code to resume guest execution. + * @addr: Address to start writing code. + * + * Assemble the code to resume guest execution. This code is common between the + * initial entry into the guest from the host, and returning from the exit + * handler back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_enter_guest(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label __maybe_unused *l = labels; + struct uasm_reloc __maybe_unused *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Set Guest EPC */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); + UASM_i_MTC0(&p, T0, C0_EPC); + + /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */ + if (cpu_has_ldpte) + UASM_i_MFC0(&p, K0, C0_PWBASE); + else + UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1); + + /* + * Set up KVM GPA pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + * + * We keep S0 pointing at struct kvm so we can load the ASID below. + */ + UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) - + (int)offsetof(struct kvm_vcpu, arch), K1); + UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Set GM bit to setup eret to VZ guest context */ + uasm_i_addiu(&p, V1, ZERO, 1); + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + if (cpu_has_guestid) { + /* + * Set root mode GuestID, so that root TLB refill handler can + * use the correct GuestID in the root TLB. + */ + + /* Get current GuestID */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = GuestCtl1.ID */ + uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT, + MIPS_GCTL1_ID_WIDTH); + uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + + /* GuestID handles dealiasing so we don't need to touch ASID */ + goto skip_asid_restore; + } + + /* Root ASID Dealias (RAD) */ + + /* Save host ASID */ + UASM_i_MFC0(&p, K0, C0_ENTRYHI); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + + /* Set the root ASID for the Guest */ + UASM_i_ADDIU(&p, T1, S0, + offsetof(struct kvm, arch.gpa_mm.context.asid)); + + /* t1: contains the base of the ASID array, need to get the cpu id */ + /* smp_processor_id */ + uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); + /* index the ASID array */ + uasm_i_sll(&p, T2, T2, ilog2(sizeof(long))); + UASM_i_ADDU(&p, T3, T1, T2); + UASM_i_LW(&p, K0, 0, T3); +#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE + /* + * reuse ASID array offset + * cpuinfo_mips is a multiple of sizeof(long) + */ + uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long)); + uasm_i_mul(&p, T2, T2, T3); + + UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); + UASM_i_ADDU(&p, AT, AT, T2); + UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT); + uasm_i_and(&p, K0, K0, T2); +#else + uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); +#endif + + /* Set up KVM VZ root ASID (!guestid) */ + uasm_i_mtc0(&p, K0, C0_ENTRYHI); +skip_asid_restore: + uasm_i_ehb(&p); + + /* Disable RDHWR access */ + uasm_i_mtc0(&p, ZERO, C0_HWRENA); + + /* load the guest context from VCPU and return */ + for (i = 1; i < 32; ++i) { + /* Guest k0/k1 loaded later */ + if (i == K0 || i == K1) + continue; + UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* Restore hi/lo */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1); + uasm_i_mthi(&p, K0); + + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1); + uasm_i_mtlo(&p, K0); +#endif + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Jump to guest */ + uasm_i_eret(&p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble TLB refill exception fast path handler for guest execution. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; +#ifndef CONFIG_CPU_LOONGSON64 + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; +#endif + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + + /* + * Some of the common tlbex code uses current_cpu_type(). For KVM we + * assume symmetry and just disable preemption to silence the warning. + */ + preempt_disable(); + +#ifdef CONFIG_CPU_LOONGSON64 + UASM_i_MFC0(&p, K1, C0_PGD); + uasm_i_lddir(&p, K0, K1, 3); /* global page dir */ +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_lddir(&p, K1, K0, 1); /* middle page dir */ +#endif + uasm_i_ldpte(&p, K1, 0); /* even */ + uasm_i_ldpte(&p, K1, 1); /* odd */ + uasm_i_tlbwr(&p); +#else + /* + * Now for the actual refill bit. A lot of this can be common with the + * Linux TLB refill handler, however we don't need to handle so many + * cases. We only need to handle user mode refills, and user mode runs + * with 32-bit addressing. + * + * Therefore the branch to label_vmalloc generated by build_get_pmde64() + * that isn't resolved should never actually get taken and is harmless + * to leave in place for now. + */ + +#ifdef CONFIG_64BIT + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +#else + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + + /* we don't support huge pages yet */ + + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); +#endif + + preempt_enable(); + + /* Get the VCPU pointer from the VCPU scratch register again */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + uasm_i_ehb(&p); + UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Jump to guest */ + uasm_i_eret(&p); + + return p; +} + +/** + * kvm_mips_build_exception() - Assemble first level guest exception handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble exception vector code for guest execution. The generated vector will + * branch to the common exception handler generated by kvm_mips_build_exit(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + + /* Branch to the common handler */ + uasm_il_b(&p, &r, label_exit_common); + uasm_i_nop(&p); + + uasm_l_exit_common(&l, handler); + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_exit() - Assemble common guest exit handler. + * @addr: Address to start writing code. + * + * Assemble the generic guest exit handling code. This is called by the + * exception vectors (generated by kvm_mips_build_exception()), and calls + * kvm_mips_handle_exit(), then either resumes the guest or returns to the host + * depending on the return value. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exit(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[3]; + struct uasm_reloc relocs[3]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* + * Generic Guest exception handler. We end up here when the guest + * does something that causes a trap to kernel mode. + * + * Both k0/k1 registers will have already been saved (k0 into the vcpu + * structure, and k1 into the scratch_tmp register). + * + * The k1 register will already contain the kvm_vcpu_arch pointer. + */ + + /* Start saving Guest context to VCPU */ + for (i = 0; i < 32; ++i) { + /* Guest k0/k1 saved later */ + if (i == K0 || i == K1) + continue; + UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* We need to save hi/lo and restore them on the way out */ + uasm_i_mfhi(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1); + + uasm_i_mflo(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1); +#endif + + /* Finally save guest k1 to VCPU */ + uasm_i_ehb(&p); + UASM_i_MFC0(&p, T0, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Now that context has been saved, we can use other registers */ + + /* Restore vcpu */ + UASM_i_MFC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]); + + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process + * the exception + */ + UASM_i_MFC0(&p, K0, C0_EPC); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1); + + UASM_i_MFC0(&p, K0, C0_BADVADDR); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr), + K1); + + uasm_i_mfc0(&p, K0, C0_CAUSE); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); + + if (cpu_has_badinstr) { + uasm_i_mfc0(&p, K0, C0_BADINSTR); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstr), K1); + } + + if (cpu_has_badinstrp) { + uasm_i_mfc0(&p, K0, C0_BADINSTRP); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstrp), K1); + } + + /* Now restore the host state just enough to run the handlers */ + + /* Switch EBASE to the one used by Linux */ + /* load up the host EBASE */ + uasm_i_mfc0(&p, V0, C0_STATUS); + + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V0, AT); + + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + UASM_i_LA_mostly(&p, K0, (long)&ebase); + UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0); + build_set_exc_base(&p, K0); + + if (raw_cpu_has_fpu) { + /* + * If FPU is enabled, save FCR31 and clear it so that later + * ctc1's don't trigger FPE for pending exceptions. + */ + uasm_i_lui(&p, AT, ST0_CU1 >> 16); + uasm_i_and(&p, V1, V0, AT); + uasm_il_beqz(&p, &r, V1, label_fpu_1); + uasm_i_nop(&p); + uasm_i_cfc1(&p, T0, 31); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31), + K1); + uasm_i_ctc1(&p, ZERO, 31); + uasm_l_fpu_1(&l, p); + } + + if (cpu_has_msa) { + /* + * If MSA is enabled, save MSACSR and clear it so that later + * instructions don't trigger MSAFPE for pending exceptions. + */ + uasm_i_mfc0(&p, T0, C0_CONFIG5); + uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */ + uasm_il_beqz(&p, &r, T0, label_msa_1); + uasm_i_nop(&p); + uasm_i_cfcmsa(&p, T0, MSA_CSR); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr), + K1); + uasm_i_ctcmsa(&p, MSA_CSR, ZERO); + uasm_l_msa_1(&l, p); + } + + /* Restore host ASID */ + if (!cpu_has_guestid) { + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + UASM_i_MTC0(&p, K0, C0_ENTRYHI); + } + + /* + * Set up normal Linux process pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + */ + UASM_i_LW(&p, A0, + offsetof(struct kvm_vcpu_arch, host_pgd), K1); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Clear GM bit so we don't enter guest mode when EXL is cleared */ + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + /* Save GuestCtl0 so we can access GExcCode after CPU migration */ + uasm_i_sw(&p, K0, + offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1); + + if (cpu_has_guestid) { + /* + * Clear root mode GuestID, so that root TLB operations use the + * root GuestID in the root TLB. + */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */ + uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + } + + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ + uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); + uasm_i_and(&p, V0, V0, AT); + uasm_i_lui(&p, AT, ST0_CU0 >> 16); + uasm_i_or(&p, V0, V0, AT); +#ifdef CONFIG_64BIT + uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX); +#endif + uasm_i_mtc0(&p, V0, C0_STATUS); + uasm_i_ehb(&p); + + /* Load up host GP */ + UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* Need a stack before we can jump to "C" */ + UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Saved host state */ + UASM_i_ADDIU(&p, SP, SP, -(int)sizeof(struct pt_regs)); + + /* + * XXXKYMA do we need to load the host ASID, maybe not because the + * kernel entries are marked GLOBAL, need to verify + */ + + /* Restore host scratch registers, as we'll have clobbered them */ + kvm_mips_build_restore_scratch(&p, K0, SP); + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Jump to handler */ + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? + * Now jump to the kvm_mips_handle_exit() to see if we can deal + * with this in the kernel + */ + uasm_i_move(&p, A0, S0); + UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); + uasm_i_jalr(&p, RA, T9); + UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); + + uasm_resolve_relocs(relocs, labels); + + p = kvm_mips_build_ret_from_exit(p); + + return p; +} + +/** + * kvm_mips_build_ret_from_exit() - Assemble guest exit return handler. + * @addr: Address to start writing code. + * + * Assemble the code to handle the return from kvm_mips_handle_exit(), either + * resuming the guest or returning to the host depending on the return value. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_from_exit(void *addr) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Return from handler Make sure interrupts are disabled */ + uasm_i_di(&p, ZERO); + uasm_i_ehb(&p); + + /* + * XXXKYMA: k0/k1 could have been blown away if we processed + * an exception while we were handling the exception from the + * guest, reload k1 + */ + + uasm_i_move(&p, K1, S0); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* + * Check return value, should tell us if we are returning to the + * host (handle I/O etc)or resuming the guest + */ + uasm_i_andi(&p, T0, V0, RESUME_HOST); + uasm_il_bnez(&p, &r, T0, label_return_to_host); + uasm_i_nop(&p); + + p = kvm_mips_build_ret_to_guest(p); + + uasm_l_return_to_host(&l, p); + p = kvm_mips_build_ret_to_host(p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_ret_to_guest() - Assemble code to return to the guest. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_guest(void *addr) +{ + u32 *p = addr; + + /* Put the saved pointer to vcpu (s0) back into the scratch register */ + UASM_i_MTC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]); + + /* Load up the Guest EBASE to minimize the window where BEV is set */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + + /* Switch EBASE back to the one used by KVM */ + uasm_i_mfc0(&p, V1, C0_STATUS); + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V1, AT); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + build_set_exc_base(&p, T0); + + /* Setup status register for running guest in UM */ + uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX)); + uasm_i_and(&p, V1, V1, AT); + uasm_i_mtc0(&p, V1, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_ret_to_host() - Assemble code to return to the host. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the host, i.e. to the caller of the vcpu_run + * function generated by kvm_mips_build_vcpu_run(). + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_host(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* EBASE is already pointing to Linux */ + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1); + UASM_i_ADDIU(&p, K1, K1, -(int)sizeof(struct pt_regs)); + + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ + uasm_i_sra(&p, K0, V0, 2); + uasm_i_move(&p, V0, K0); + + /* Load context saved on the host stack */ + for (i = 16; i < 31; ++i) { + if (i == 24) + i = 28; + UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Restore RA, which is the address we will return to */ + UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1); + uasm_i_jr(&p, RA); + uasm_i_nop(&p); + + return p; +} + diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S new file mode 100644 index 0000000000..eb2e8cc353 --- /dev/null +++ b/arch/mips/kvm/fpu.S @@ -0,0 +1,125 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * FPU context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/fpregdef.h> +#include <asm/mipsregs.h> +#include <asm/regdef.h> + +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + + .set noreorder + .set noat + +LEAF(__kvm_save_fpu) + .set push + .set hardfloat + .set fp=64 + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + sdc1 $f1, VCPU_FPR1(a0) + sdc1 $f3, VCPU_FPR3(a0) + sdc1 $f5, VCPU_FPR5(a0) + sdc1 $f7, VCPU_FPR7(a0) + sdc1 $f9, VCPU_FPR9(a0) + sdc1 $f11, VCPU_FPR11(a0) + sdc1 $f13, VCPU_FPR13(a0) + sdc1 $f15, VCPU_FPR15(a0) + sdc1 $f17, VCPU_FPR17(a0) + sdc1 $f19, VCPU_FPR19(a0) + sdc1 $f21, VCPU_FPR21(a0) + sdc1 $f23, VCPU_FPR23(a0) + sdc1 $f25, VCPU_FPR25(a0) + sdc1 $f27, VCPU_FPR27(a0) + sdc1 $f29, VCPU_FPR29(a0) + sdc1 $f31, VCPU_FPR31(a0) +1: sdc1 $f0, VCPU_FPR0(a0) + sdc1 $f2, VCPU_FPR2(a0) + sdc1 $f4, VCPU_FPR4(a0) + sdc1 $f6, VCPU_FPR6(a0) + sdc1 $f8, VCPU_FPR8(a0) + sdc1 $f10, VCPU_FPR10(a0) + sdc1 $f12, VCPU_FPR12(a0) + sdc1 $f14, VCPU_FPR14(a0) + sdc1 $f16, VCPU_FPR16(a0) + sdc1 $f18, VCPU_FPR18(a0) + sdc1 $f20, VCPU_FPR20(a0) + sdc1 $f22, VCPU_FPR22(a0) + sdc1 $f24, VCPU_FPR24(a0) + sdc1 $f26, VCPU_FPR26(a0) + sdc1 $f28, VCPU_FPR28(a0) + jr ra + sdc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_save_fpu) + +LEAF(__kvm_restore_fpu) + .set push + .set hardfloat + .set fp=64 + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + ldc1 $f1, VCPU_FPR1(a0) + ldc1 $f3, VCPU_FPR3(a0) + ldc1 $f5, VCPU_FPR5(a0) + ldc1 $f7, VCPU_FPR7(a0) + ldc1 $f9, VCPU_FPR9(a0) + ldc1 $f11, VCPU_FPR11(a0) + ldc1 $f13, VCPU_FPR13(a0) + ldc1 $f15, VCPU_FPR15(a0) + ldc1 $f17, VCPU_FPR17(a0) + ldc1 $f19, VCPU_FPR19(a0) + ldc1 $f21, VCPU_FPR21(a0) + ldc1 $f23, VCPU_FPR23(a0) + ldc1 $f25, VCPU_FPR25(a0) + ldc1 $f27, VCPU_FPR27(a0) + ldc1 $f29, VCPU_FPR29(a0) + ldc1 $f31, VCPU_FPR31(a0) +1: ldc1 $f0, VCPU_FPR0(a0) + ldc1 $f2, VCPU_FPR2(a0) + ldc1 $f4, VCPU_FPR4(a0) + ldc1 $f6, VCPU_FPR6(a0) + ldc1 $f8, VCPU_FPR8(a0) + ldc1 $f10, VCPU_FPR10(a0) + ldc1 $f12, VCPU_FPR12(a0) + ldc1 $f14, VCPU_FPR14(a0) + ldc1 $f16, VCPU_FPR16(a0) + ldc1 $f18, VCPU_FPR18(a0) + ldc1 $f20, VCPU_FPR20(a0) + ldc1 $f22, VCPU_FPR22(a0) + ldc1 $f24, VCPU_FPR24(a0) + ldc1 $f26, VCPU_FPR26(a0) + ldc1 $f28, VCPU_FPR28(a0) + jr ra + ldc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_restore_fpu) + +LEAF(__kvm_restore_fcsr) + .set push + .set hardfloat + lw t0, VCPU_FCR31(a0) + /* + * The ctc1 must stay at this offset in __kvm_restore_fcsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + ctc1 t0, fcr31 + jr ra + nop + .set pop + END(__kvm_restore_fcsr) diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c new file mode 100644 index 0000000000..8306343519 --- /dev/null +++ b/arch/mips/kvm/hypcall.c @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Hypercall handling. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/kvm_para.h> + +#define MAX_HYPCALL_ARGS 4 + +enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, + union mips_instruction inst) +{ + unsigned int code = (inst.co_format.code >> 5) & 0x3ff; + + kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code); + + switch (code) { + case 0: + return EMULATE_HYPERCALL; + default: + return EMULATE_FAIL; + }; +} + +static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num, + const unsigned long *args, unsigned long *hret) +{ + /* Report unimplemented hypercall to guest */ + *hret = -KVM_ENOSYS; + return RESUME_GUEST; +} + +int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu) +{ + unsigned long num, args[MAX_HYPCALL_ARGS]; + + /* read hypcall number and arguments */ + num = vcpu->arch.gprs[2]; /* v0 */ + args[0] = vcpu->arch.gprs[4]; /* a0 */ + args[1] = vcpu->arch.gprs[5]; /* a1 */ + args[2] = vcpu->arch.gprs[6]; /* a2 */ + args[3] = vcpu->arch.gprs[7]; /* a3 */ + + return kvm_mips_hypercall(vcpu, num, + args, &vcpu->arch.gprs[2] /* v0 */); +} diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c new file mode 100644 index 0000000000..0277942279 --- /dev/null +++ b/arch/mips/kvm/interrupt.c @@ -0,0 +1,56 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupt delivery + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/memblock.h> +#include <asm/page.h> +#include <asm/cacheflush.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" + +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long *pending_clr = &vcpu->arch.pending_exceptions_clr; + unsigned int priority; + + if (!(*pending) && !(*pending_clr)) + return; + + priority = __ffs(*pending_clr); + while (priority <= MIPS_EXC_MAX) { + kvm_mips_callbacks->irq_clear(vcpu, priority, cause); + + priority = find_next_bit(pending_clr, + BITS_PER_BYTE * sizeof(*pending_clr), + priority + 1); + } + + priority = __ffs(*pending); + while (priority <= MIPS_EXC_MAX) { + kvm_mips_callbacks->irq_deliver(vcpu, priority, cause); + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } + +} + +int kvm_mips_pending_timer(struct kvm_vcpu *vcpu) +{ + return test_bit(MIPS_EXC_INT_TIMER, &vcpu->arch.pending_exceptions); +} diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h new file mode 100644 index 0000000000..e529ea2bb3 --- /dev/null +++ b/arch/mips/kvm/interrupt.h @@ -0,0 +1,39 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupts + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +/* + * MIPS Exception Priorities, exceptions (including interrupts) are queued up + * for the guest in the order specified by their priorities + */ + +#define MIPS_EXC_RESET 0 +#define MIPS_EXC_SRESET 1 +#define MIPS_EXC_DEBUG_ST 2 +#define MIPS_EXC_DEBUG 3 +#define MIPS_EXC_DDB 4 +#define MIPS_EXC_NMI 5 +#define MIPS_EXC_MCHK 6 +#define MIPS_EXC_INT_TIMER 7 +#define MIPS_EXC_INT_IO_1 8 +#define MIPS_EXC_INT_IO_2 9 +#define MIPS_EXC_EXECUTE 10 +#define MIPS_EXC_INT_IPI_1 11 +#define MIPS_EXC_INT_IPI_2 12 +#define MIPS_EXC_MAX 13 +/* XXXSL More to follow */ + +#define C_TI (_ULCAST_(1) << 30) + +extern u32 *kvm_priority_to_irq; +u32 kvm_irq_to_priority(u32 irq); + +int kvm_mips_pending_timer(struct kvm_vcpu *vcpu); + +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause); diff --git a/arch/mips/kvm/loongson_ipi.c b/arch/mips/kvm/loongson_ipi.c new file mode 100644 index 0000000000..5d53f32d83 --- /dev/null +++ b/arch/mips/kvm/loongson_ipi.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Loongson-3 Virtual IPI interrupt support. + * + * Copyright (C) 2019 Loongson Technologies, Inc. All rights reserved. + * + * Authors: Chen Zhu <zhuchen@loongson.cn> + * Authors: Huacai Chen <chenhc@lemote.com> + */ + +#include <linux/kvm_host.h> + +#define IPI_BASE 0x3ff01000ULL + +#define CORE0_STATUS_OFF 0x000 +#define CORE0_EN_OFF 0x004 +#define CORE0_SET_OFF 0x008 +#define CORE0_CLEAR_OFF 0x00c +#define CORE0_BUF_20 0x020 +#define CORE0_BUF_28 0x028 +#define CORE0_BUF_30 0x030 +#define CORE0_BUF_38 0x038 + +#define CORE1_STATUS_OFF 0x100 +#define CORE1_EN_OFF 0x104 +#define CORE1_SET_OFF 0x108 +#define CORE1_CLEAR_OFF 0x10c +#define CORE1_BUF_20 0x120 +#define CORE1_BUF_28 0x128 +#define CORE1_BUF_30 0x130 +#define CORE1_BUF_38 0x138 + +#define CORE2_STATUS_OFF 0x200 +#define CORE2_EN_OFF 0x204 +#define CORE2_SET_OFF 0x208 +#define CORE2_CLEAR_OFF 0x20c +#define CORE2_BUF_20 0x220 +#define CORE2_BUF_28 0x228 +#define CORE2_BUF_30 0x230 +#define CORE2_BUF_38 0x238 + +#define CORE3_STATUS_OFF 0x300 +#define CORE3_EN_OFF 0x304 +#define CORE3_SET_OFF 0x308 +#define CORE3_CLEAR_OFF 0x30c +#define CORE3_BUF_20 0x320 +#define CORE3_BUF_28 0x328 +#define CORE3_BUF_30 0x330 +#define CORE3_BUF_38 0x338 + +static int loongson_vipi_read(struct loongson_kvm_ipi *ipi, + gpa_t addr, int len, void *val) +{ + uint32_t core = (addr >> 8) & 3; + uint32_t node = (addr >> 44) & 3; + uint32_t id = core + node * 4; + uint64_t offset = addr & 0xff; + void *pbuf; + struct ipi_state *s = &(ipi->ipistate[id]); + + BUG_ON(offset & (len - 1)); + + switch (offset) { + case CORE0_STATUS_OFF: + *(uint64_t *)val = s->status; + break; + + case CORE0_EN_OFF: + *(uint64_t *)val = s->en; + break; + + case CORE0_SET_OFF: + *(uint64_t *)val = 0; + break; + + case CORE0_CLEAR_OFF: + *(uint64_t *)val = 0; + break; + + case CORE0_BUF_20 ... CORE0_BUF_38: + pbuf = (void *)s->buf + (offset - 0x20); + if (len == 8) + *(uint64_t *)val = *(uint64_t *)pbuf; + else /* Assume len == 4 */ + *(uint32_t *)val = *(uint32_t *)pbuf; + break; + + default: + pr_notice("%s with unknown addr %llx\n", __func__, addr); + break; + } + + return 0; +} + +static int loongson_vipi_write(struct loongson_kvm_ipi *ipi, + gpa_t addr, int len, const void *val) +{ + uint32_t core = (addr >> 8) & 3; + uint32_t node = (addr >> 44) & 3; + uint32_t id = core + node * 4; + uint64_t data, offset = addr & 0xff; + void *pbuf; + struct kvm *kvm = ipi->kvm; + struct kvm_mips_interrupt irq; + struct ipi_state *s = &(ipi->ipistate[id]); + + data = *(uint64_t *)val; + BUG_ON(offset & (len - 1)); + + switch (offset) { + case CORE0_STATUS_OFF: + break; + + case CORE0_EN_OFF: + s->en = data; + break; + + case CORE0_SET_OFF: + s->status |= data; + irq.cpu = id; + irq.irq = 6; + kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq); + break; + + case CORE0_CLEAR_OFF: + s->status &= ~data; + if (!s->status) { + irq.cpu = id; + irq.irq = -6; + kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq); + } + break; + + case CORE0_BUF_20 ... CORE0_BUF_38: + pbuf = (void *)s->buf + (offset - 0x20); + if (len == 8) + *(uint64_t *)pbuf = (uint64_t)data; + else /* Assume len == 4 */ + *(uint32_t *)pbuf = (uint32_t)data; + break; + + default: + pr_notice("%s with unknown addr %llx\n", __func__, addr); + break; + } + + return 0; +} + +static int kvm_ipi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + unsigned long flags; + struct loongson_kvm_ipi *ipi; + struct ipi_io_device *ipi_device; + + ipi_device = container_of(dev, struct ipi_io_device, device); + ipi = ipi_device->ipi; + + spin_lock_irqsave(&ipi->lock, flags); + loongson_vipi_read(ipi, addr, len, val); + spin_unlock_irqrestore(&ipi->lock, flags); + + return 0; +} + +static int kvm_ipi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + unsigned long flags; + struct loongson_kvm_ipi *ipi; + struct ipi_io_device *ipi_device; + + ipi_device = container_of(dev, struct ipi_io_device, device); + ipi = ipi_device->ipi; + + spin_lock_irqsave(&ipi->lock, flags); + loongson_vipi_write(ipi, addr, len, val); + spin_unlock_irqrestore(&ipi->lock, flags); + + return 0; +} + +static const struct kvm_io_device_ops kvm_ipi_ops = { + .read = kvm_ipi_read, + .write = kvm_ipi_write, +}; + +void kvm_init_loongson_ipi(struct kvm *kvm) +{ + int i; + unsigned long addr; + struct loongson_kvm_ipi *s; + struct kvm_io_device *device; + + s = &kvm->arch.ipi; + s->kvm = kvm; + spin_lock_init(&s->lock); + + /* + * Initialize IPI device + */ + for (i = 0; i < 4; i++) { + device = &s->dev_ipi[i].device; + kvm_iodevice_init(device, &kvm_ipi_ops); + addr = (((unsigned long)i) << 44) + IPI_BASE; + mutex_lock(&kvm->slots_lock); + kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, 0x400, device); + mutex_unlock(&kvm->slots_lock); + s->dev_ipi[i].ipi = s; + s->dev_ipi[i].node_id = i; + } +} diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c new file mode 100644 index 0000000000..231ac052b5 --- /dev/null +++ b/arch/mips/kvm/mips.c @@ -0,0 +1,1646 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: MIPS specific KVM APIs + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/memblock.h> +#include <linux/pgtable.h> + +#include <asm/fpu.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +#ifndef VECTORSPACING +#define VECTORSPACING 0x100 /* for EI/VI mode */ +#endif + +const struct _kvm_stats_desc kvm_vm_stats_desc[] = { + KVM_GENERIC_VM_STATS() +}; + +const struct kvm_stats_header kvm_vm_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vm_stats_desc), +}; + +const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { + KVM_GENERIC_VCPU_STATS(), + STATS_DESC_COUNTER(VCPU, wait_exits), + STATS_DESC_COUNTER(VCPU, cache_exits), + STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, int_exits), + STATS_DESC_COUNTER(VCPU, cop_unusable_exits), + STATS_DESC_COUNTER(VCPU, tlbmod_exits), + STATS_DESC_COUNTER(VCPU, tlbmiss_ld_exits), + STATS_DESC_COUNTER(VCPU, tlbmiss_st_exits), + STATS_DESC_COUNTER(VCPU, addrerr_st_exits), + STATS_DESC_COUNTER(VCPU, addrerr_ld_exits), + STATS_DESC_COUNTER(VCPU, syscall_exits), + STATS_DESC_COUNTER(VCPU, resvd_inst_exits), + STATS_DESC_COUNTER(VCPU, break_inst_exits), + STATS_DESC_COUNTER(VCPU, trap_inst_exits), + STATS_DESC_COUNTER(VCPU, msa_fpe_exits), + STATS_DESC_COUNTER(VCPU, fpe_exits), + STATS_DESC_COUNTER(VCPU, msa_disabled_exits), + STATS_DESC_COUNTER(VCPU, flush_dcache_exits), + STATS_DESC_COUNTER(VCPU, vz_gpsi_exits), + STATS_DESC_COUNTER(VCPU, vz_gsfc_exits), + STATS_DESC_COUNTER(VCPU, vz_hc_exits), + STATS_DESC_COUNTER(VCPU, vz_grr_exits), + STATS_DESC_COUNTER(VCPU, vz_gva_exits), + STATS_DESC_COUNTER(VCPU, vz_ghfc_exits), + STATS_DESC_COUNTER(VCPU, vz_gpa_exits), + STATS_DESC_COUNTER(VCPU, vz_resvd_exits), +#ifdef CONFIG_CPU_LOONGSON64 + STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits), +#endif +}; + +const struct kvm_stats_header kvm_vcpu_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vcpu_stats_desc), +}; + +bool kvm_trace_guest_mode_change; + +int kvm_guest_mode_change_trace_reg(void) +{ + kvm_trace_guest_mode_change = true; + return 0; +} + +void kvm_guest_mode_change_trace_unreg(void) +{ + kvm_trace_guest_mode_change = false; +} + +/* + * XXXKYMA: We are simulatoring a processor that has the WII bit set in + * Config7, so we are "runnable" if interrupts are pending + */ +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.pending_exceptions); +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return 1; +} + +int kvm_arch_hardware_enable(void) +{ + return kvm_mips_callbacks->hardware_enable(); +} + +void kvm_arch_hardware_disable(void) +{ + kvm_mips_callbacks->hardware_disable(); +} + +extern void kvm_init_loongson_ipi(struct kvm *kvm); + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + switch (type) { + case KVM_VM_MIPS_AUTO: + break; + case KVM_VM_MIPS_VZ: + break; + default: + /* Unsupported KVM type */ + return -EINVAL; + } + + /* Allocate page table to map GPA -> RPA */ + kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); + if (!kvm->arch.gpa_mm.pgd) + return -ENOMEM; + +#ifdef CONFIG_CPU_LOONGSON64 + kvm_init_loongson_ipi(kvm); +#endif + + return 0; +} + +static void kvm_mips_free_gpa_pt(struct kvm *kvm) +{ + /* It should always be safe to remove after flushing the whole range */ + WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0)); + pgd_free(NULL, kvm->arch.gpa_mm.pgd); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvm_destroy_vcpus(kvm); + kvm_mips_free_gpa_pt(kvm); +} + +long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + /* Flush whole GPA */ + kvm_mips_flush_gpa_pt(kvm, 0, ~0); + kvm_flush_remote_tlbs(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + /* + * The slot has been made invalid (ready for moving or deletion), so we + * need to ensure that it can no longer be accessed by any guest VCPUs. + */ + + spin_lock(&kvm->mmu_lock); + /* Flush slot from GPA */ + kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, + slot->base_gfn + slot->npages - 1); + kvm_flush_remote_tlbs_memslot(kvm, slot); + spin_unlock(&kvm->mmu_lock); +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + int needs_flush; + + /* + * If dirty page logging is enabled, write protect all pages in the slot + * ready for dirty logging. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() + */ + if (change == KVM_MR_FLAGS_ONLY && + (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + spin_lock(&kvm->mmu_lock); + /* Write protect GPA page table entries */ + needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, + new->base_gfn + new->npages - 1); + if (needs_flush) + kvm_flush_remote_tlbs_memslot(kvm, new); + spin_unlock(&kvm->mmu_lock); + } +} + +static inline void dump_handler(const char *symbol, void *start, void *end) +{ + u32 *p; + + pr_debug("LEAF(%s)\n", symbol); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + + for (p = start; p < (u32 *)end; ++p) + pr_debug("\t.word\t0x%08x\t\t# %p\n", *p, p); + + pr_debug("\t.set\tpop\n"); + + pr_debug("\tEND(%s)\n", symbol); +} + +/* low level hrtimer wake routine */ +static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); + + kvm_mips_callbacks->queue_timer_int(vcpu); + + vcpu->arch.wait = 0; + rcuwait_wake_up(&vcpu->wait); + + return kvm_mips_count_timeout(vcpu); +} + +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int err, size; + void *gebase, *p, *handler, *refill_start, *refill_end; + int i; + + kvm_debug("kvm @ %p: create cpu %d at %p\n", + vcpu->kvm, vcpu->vcpu_id, vcpu); + + err = kvm_mips_callbacks->vcpu_init(vcpu); + if (err) + return err; + + hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; + + /* + * Allocate space for host mode exception handlers that handle + * guest mode exits + */ + if (cpu_has_veic || cpu_has_vint) + size = 0x200 + VECTORSPACING * 64; + else + size = 0x4000; + + gebase = kzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL); + + if (!gebase) { + err = -ENOMEM; + goto out_uninit_vcpu; + } + kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", + ALIGN(size, PAGE_SIZE), gebase); + + /* + * Check new ebase actually fits in CP0_EBase. The lack of a write gate + * limits us to the low 512MB of physical address space. If the memory + * we allocate is out of range, just give up now. + */ + if (!cpu_has_ebase_wg && virt_to_phys(gebase) >= 0x20000000) { + kvm_err("CP0_EBase.WG required for guest exception base %pK\n", + gebase); + err = -ENOMEM; + goto out_free_gebase; + } + + /* Save new ebase */ + vcpu->arch.guest_ebase = gebase; + + /* Build guest exception vectors dynamically in unmapped memory */ + handler = gebase + 0x2000; + + /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */ + refill_start = gebase; + if (IS_ENABLED(CONFIG_64BIT)) + refill_start += 0x080; + refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); + + /* General Exception Entry point */ + kvm_mips_build_exception(gebase + 0x180, handler); + + /* For vectored interrupts poke the exception code @ all offsets 0-7 */ + for (i = 0; i < 8; i++) { + kvm_debug("L1 Vectored handler @ %p\n", + gebase + 0x200 + (i * VECTORSPACING)); + kvm_mips_build_exception(gebase + 0x200 + i * VECTORSPACING, + handler); + } + + /* General exit handler */ + p = handler; + p = kvm_mips_build_exit(p); + + /* Guest entry routine */ + vcpu->arch.vcpu_run = p; + p = kvm_mips_build_vcpu_run(p); + + /* Dump the generated code */ + pr_debug("#include <asm/asm.h>\n"); + pr_debug("#include <asm/regdef.h>\n"); + pr_debug("\n"); + dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); + dump_handler("kvm_tlb_refill", refill_start, refill_end); + dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); + dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); + + /* Invalidate the icache for these ranges */ + flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); + + /* Init */ + vcpu->arch.last_sched_cpu = -1; + vcpu->arch.last_exec_cpu = -1; + + /* Initial guest state */ + err = kvm_mips_callbacks->vcpu_setup(vcpu); + if (err) + goto out_free_gebase; + + return 0; + +out_free_gebase: + kfree(gebase); +out_uninit_vcpu: + kvm_mips_callbacks->vcpu_uninit(vcpu); + return err; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + + kvm_mips_dump_stats(vcpu); + + kvm_mmu_free_memory_caches(vcpu); + kfree(vcpu->arch.guest_ebase); + + kvm_mips_callbacks->vcpu_uninit(vcpu); +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -ENOIOCTLCMD; +} + +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_enter_irqoff(); + ret = kvm_mips_callbacks->vcpu_run(vcpu); + guest_state_exit_irqoff(); + + return ret; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +{ + int r = -EINTR; + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + kvm_mips_complete_mmio_load(vcpu); + vcpu->mmio_needed = 0; + } + + if (vcpu->run->immediate_exit) + goto out; + + lose_fpu(1); + + local_irq_disable(); + guest_timing_enter_irqoff(); + trace_kvm_enter(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_run() callback is not + * reordered ahead of the write to vcpu->mode, or we could miss a TLB + * flush request while the requester sees the VCPU as outside of guest + * mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + r = kvm_mips_vcpu_enter_exit(vcpu); + + /* + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. + * + * TODO: is there a barrier which ensures that pending interrupts are + * recognised? Currently this just hopes that the CPU takes any pending + * interrupts between the enable and disable. + */ + local_irq_enable(); + local_irq_disable(); + + trace_kvm_out(vcpu); + guest_timing_exit_irqoff(); + local_irq_enable(); + +out: + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + return r; +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + struct kvm_vcpu *dvcpu = NULL; + + if (intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_1] || + intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_2] || + intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_1]) || + intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_2])) + kvm_debug("%s: CPU: %d, INTR: %d\n", __func__, irq->cpu, + (int)intr); + + if (irq->cpu == -1) + dvcpu = vcpu; + else + dvcpu = kvm_get_vcpu(vcpu->kvm, irq->cpu); + + if (intr == 2 || intr == 3 || intr == 4 || intr == 6) { + kvm_mips_callbacks->queue_io_int(dvcpu, irq); + + } else if (intr == -2 || intr == -3 || intr == -4 || intr == -6) { + kvm_mips_callbacks->dequeue_io_int(dvcpu, irq); + } else { + kvm_err("%s: invalid interrupt ioctl (%d:%d)\n", __func__, + irq->cpu, irq->irq); + return -EINVAL; + } + + dvcpu->arch.wait = 0; + + rcuwait_wake_up(&dvcpu->wait); + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +static u64 kvm_mips_get_one_regs[] = { + KVM_REG_MIPS_R0, + KVM_REG_MIPS_R1, + KVM_REG_MIPS_R2, + KVM_REG_MIPS_R3, + KVM_REG_MIPS_R4, + KVM_REG_MIPS_R5, + KVM_REG_MIPS_R6, + KVM_REG_MIPS_R7, + KVM_REG_MIPS_R8, + KVM_REG_MIPS_R9, + KVM_REG_MIPS_R10, + KVM_REG_MIPS_R11, + KVM_REG_MIPS_R12, + KVM_REG_MIPS_R13, + KVM_REG_MIPS_R14, + KVM_REG_MIPS_R15, + KVM_REG_MIPS_R16, + KVM_REG_MIPS_R17, + KVM_REG_MIPS_R18, + KVM_REG_MIPS_R19, + KVM_REG_MIPS_R20, + KVM_REG_MIPS_R21, + KVM_REG_MIPS_R22, + KVM_REG_MIPS_R23, + KVM_REG_MIPS_R24, + KVM_REG_MIPS_R25, + KVM_REG_MIPS_R26, + KVM_REG_MIPS_R27, + KVM_REG_MIPS_R28, + KVM_REG_MIPS_R29, + KVM_REG_MIPS_R30, + KVM_REG_MIPS_R31, + +#ifndef CONFIG_CPU_MIPSR6 + KVM_REG_MIPS_HI, + KVM_REG_MIPS_LO, +#endif + KVM_REG_MIPS_PC, +}; + +static u64 kvm_mips_get_one_regs_fpu[] = { + KVM_REG_MIPS_FCR_IR, + KVM_REG_MIPS_FCR_CSR, +}; + +static u64 kvm_mips_get_one_regs_msa[] = { + KVM_REG_MIPS_MSA_IR, + KVM_REG_MIPS_MSA_CSR, +}; + +static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_mips_get_one_regs); + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + ret += ARRAY_SIZE(kvm_mips_get_one_regs_fpu) + 48; + /* odd doubles */ + if (boot_cpu_data.fpu_id & MIPS_FPIR_F64) + ret += 16; + } + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; + ret += kvm_mips_callbacks->num_regs(vcpu); + + return ret; +} + +static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_mips_get_one_regs, + sizeof(kvm_mips_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs); + + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_fpu, + sizeof(kvm_mips_get_one_regs_fpu))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_fpu); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_FPR_32(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + + /* skip odd doubles if no F64 */ + if (i & 1 && !(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + continue; + + index = KVM_REG_MIPS_FPR_64(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_msa, + sizeof(kvm_mips_get_one_regs_msa))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_msa); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_VEC_128(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); +} + +static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + int ret; + s64 v; + s64 vs[2]; + unsigned int idx; + + switch (reg->id) { + /* General purpose registers */ + case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31: + v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0]; + break; +#ifndef CONFIG_CPU_MIPSR6 + case KVM_REG_MIPS_HI: + v = (long)vcpu->arch.hi; + break; + case KVM_REG_MIPS_LO: + v = (long)vcpu->arch.lo; + break; +#endif + case KVM_REG_MIPS_PC: + v = (long)vcpu->arch.pc; + break; + + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + v = get_fpr32(&fpu->fpr[idx], 0); + else + v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + v = get_fpr64(&fpu->fpr[idx], 0); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.fpu_id; + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = fpu->fcr31; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Can't access MSA registers in FR=0 mode */ + if (!(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 0); + vs[1] = get_fpr64(&fpu->fpr[idx], 1); +#else + /* most significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 1); + vs[1] = get_fpr64(&fpu->fpr[idx], 0); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.msa_id; + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = fpu->msacsr; + break; + + /* registers to be handled specially */ + default: + ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); + if (ret) + return ret; + break; + } + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + + return put_user(v, uaddr64); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + u32 v32 = (u32)v; + + return put_user(v32, uaddr32); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; + } else { + return -EINVAL; + } +} + +static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + s64 v; + s64 vs[2]; + unsigned int idx; + + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + + if (get_user(v, uaddr64) != 0) + return -EFAULT; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + s32 v32; + + if (get_user(v32, uaddr32) != 0) + return -EFAULT; + v = (s64)v32; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; + } else { + return -EINVAL; + } + + switch (reg->id) { + /* General purpose registers */ + case KVM_REG_MIPS_R0: + /* Silently ignore requests to set $0 */ + break; + case KVM_REG_MIPS_R1 ... KVM_REG_MIPS_R31: + vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0] = v; + break; +#ifndef CONFIG_CPU_MIPSR6 + case KVM_REG_MIPS_HI: + vcpu->arch.hi = v; + break; + case KVM_REG_MIPS_LO: + vcpu->arch.lo = v; + break; +#endif + case KVM_REG_MIPS_PC: + vcpu->arch.pc = v; + break; + + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + set_fpr32(&fpu->fpr[idx], 0, v); + else + set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + set_fpr64(&fpu->fpr[idx], 0, v); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + fpu->fcr31 = v; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + set_fpr64(&fpu->fpr[idx], 0, vs[0]); + set_fpr64(&fpu->fpr[idx], 1, vs[1]); +#else + /* most significant byte first */ + set_fpr64(&fpu->fpr[idx], 1, vs[0]); + set_fpr64(&fpu->fpr[idx], 0, vs[1]); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + fpu->msacsr = v; + break; + + /* registers to be handled specially */ + default: + return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); + } + return 0; +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap)) + return -EINVAL; + if (cap->flags) + return -EINVAL; + if (cap->args[0]) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_MIPS_FPU: + vcpu->arch.fpu_enabled = true; + break; + case KVM_CAP_MIPS_MSA: + vcpu->arch.msa_enabled = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_mips_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, + irq.irq); + + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + vcpu_load(vcpu); + + switch (ioctl) { + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_mips_set_reg(vcpu, ®); + else + r = kvm_mips_get_reg(vcpu, ®); + break; + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; + n = reg_list.n; + reg_list.n = kvm_mips_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + break; + r = -E2BIG; + if (n < reg_list.n) + break; + r = kvm_mips_copy_reg_indices(vcpu, user_list->reg); + break; + } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } + default: + r = -ENOIOCTLCMD; + } + + vcpu_put(vcpu); + return r; +} + +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_mips_callbacks->prepare_flush_shadow(kvm); + return 1; +} + +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + int r; + + switch (ioctl) { + default: + r = -ENOIOCTLCMD; + } + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOIOCTLCMD; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_IDS; + break; + case KVM_CAP_MIPS_FPU: + /* We don't handle systems with inconsistent cpu_has_fpu */ + r = !!raw_cpu_has_fpu; + break; + case KVM_CAP_MIPS_MSA: + /* + * We don't support MSA vector partitioning yet: + * 1) It would require explicit support which can't be tested + * yet due to lack of support in current hardware. + * 2) It extends the state that would need to be saved/restored + * by e.g. QEMU for migration. + * + * When vector partitioning hardware becomes available, support + * could be added by requiring a flag when enabling + * KVM_CAP_MIPS_MSA capability to indicate that userland knows + * to save/restore the appropriate extra state. + */ + r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); + break; + default: + r = kvm_mips_callbacks->check_extension(kvm, ext); + break; + } + return r; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvm_mips_pending_timer(vcpu) || + kvm_read_c0_guest_cause(&vcpu->arch.cop0) & C_TI; +} + +int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) +{ + int i; + struct mips_coproc *cop0; + + if (!vcpu) + return -1; + + kvm_debug("VCPU Register Dump:\n"); + kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc); + kvm_debug("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); + + for (i = 0; i < 32; i += 4) { + kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, + vcpu->arch.gprs[i], + vcpu->arch.gprs[i + 1], + vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]); + } + kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi); + kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); + + cop0 = &vcpu->arch.cop0; + kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n", + kvm_read_c0_guest_status(cop0), + kvm_read_c0_guest_cause(cop0)); + + kvm_debug("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + vcpu->arch.gprs[i] = regs->gpr[i]; + vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */ + vcpu->arch.hi = regs->hi; + vcpu->arch.lo = regs->lo; + vcpu->arch.pc = regs->pc; + + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + regs->gpr[i] = vcpu->arch.gprs[i]; + + regs->hi = vcpu->arch.hi; + regs->lo = vcpu->arch.lo; + regs->pc = vcpu->arch.pc; + + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return 0; +} + +static void kvm_mips_set_c0_status(void) +{ + u32 status = read_c0_status(); + + if (cpu_has_dsp) + status |= (ST0_MX); + + write_c0_status(status); + ehb(); +} + +/* + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + enum emulation_result er = EMULATE_DONE; + u32 inst; + int ret = RESUME_GUEST; + + vcpu->mode = OUTSIDE_GUEST_MODE; + + /* Set a default exit reason */ + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + /* + * Set the appropriate status bits based on host CPU features, + * before we hit the scheduler + */ + kvm_mips_set_c0_status(); + + local_irq_enable(); + + kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", + cause, opc, run, vcpu); + trace_kvm_exit(vcpu, exccode); + + switch (exccode) { + case EXCCODE_INT: + kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc); + + ++vcpu->stat.int_exits; + + if (need_resched()) + cond_resched(); + + ret = RESUME_GUEST; + break; + + case EXCCODE_CPU: + kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc); + + ++vcpu->stat.cop_unusable_exits; + ret = kvm_mips_callbacks->handle_cop_unusable(vcpu); + /* XXXKYMA: Might need to return to user space */ + if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) + ret = RESUME_HOST; + break; + + case EXCCODE_MOD: + ++vcpu->stat.tlbmod_exits; + ret = kvm_mips_callbacks->handle_tlb_mod(vcpu); + break; + + case EXCCODE_TLBS: + kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n", + cause, kvm_read_c0_guest_status(&vcpu->arch.cop0), opc, + badvaddr); + + ++vcpu->stat.tlbmiss_st_exits; + ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu); + break; + + case EXCCODE_TLBL: + kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); + + ++vcpu->stat.tlbmiss_ld_exits; + ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu); + break; + + case EXCCODE_ADES: + ++vcpu->stat.addrerr_st_exits; + ret = kvm_mips_callbacks->handle_addr_err_st(vcpu); + break; + + case EXCCODE_ADEL: + ++vcpu->stat.addrerr_ld_exits; + ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu); + break; + + case EXCCODE_SYS: + ++vcpu->stat.syscall_exits; + ret = kvm_mips_callbacks->handle_syscall(vcpu); + break; + + case EXCCODE_RI: + ++vcpu->stat.resvd_inst_exits; + ret = kvm_mips_callbacks->handle_res_inst(vcpu); + break; + + case EXCCODE_BP: + ++vcpu->stat.break_inst_exits; + ret = kvm_mips_callbacks->handle_break(vcpu); + break; + + case EXCCODE_TR: + ++vcpu->stat.trap_inst_exits; + ret = kvm_mips_callbacks->handle_trap(vcpu); + break; + + case EXCCODE_MSAFPE: + ++vcpu->stat.msa_fpe_exits; + ret = kvm_mips_callbacks->handle_msa_fpe(vcpu); + break; + + case EXCCODE_FPE: + ++vcpu->stat.fpe_exits; + ret = kvm_mips_callbacks->handle_fpe(vcpu); + break; + + case EXCCODE_MSADIS: + ++vcpu->stat.msa_disabled_exits; + ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); + break; + + case EXCCODE_GE: + /* defer exit accounting to handler */ + ret = kvm_mips_callbacks->handle_guest_exit(vcpu); + break; + + default: + if (cause & CAUSEF_BD) + opc += 1; + inst = 0; + kvm_get_badinstr(opc, vcpu, &inst); + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + kvm_read_c0_guest_status(&vcpu->arch.cop0)); + kvm_arch_vcpu_dump_regs(vcpu); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + } + + local_irq_disable(); + + if (ret == RESUME_GUEST) + kvm_vz_acquire_htimer(vcpu); + + if (er == EMULATE_DONE && !(ret & RESUME_HOST)) + kvm_mips_deliver_interrupts(vcpu, cause); + + if (!(ret & RESUME_HOST)) { + /* Only check for signals if not already exiting to userspace */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + ret = (-EINTR << 2) | RESUME_HOST; + ++vcpu->stat.signal_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL); + } + } + + if (ret == RESUME_GUEST) { + trace_kvm_reenter(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_reenter() + * callback is not reordered ahead of the write to vcpu->mode, + * or we could miss a TLB flush request while the requester sees + * the VCPU as outside of guest mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + kvm_mips_callbacks->vcpu_reenter(vcpu); + + /* + * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context + * is live), restore FCR31 / MSACSR. + * + * This should be before returning to the guest exception + * vector, as it may well cause an [MSA] FP exception if there + * are pending exception bits unmasked. (see + * kvm_mips_csr_die_notifier() for how that is handled). + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch) && + read_c0_status() & ST0_CU1) + __kvm_restore_fcsr(&vcpu->arch); + + if (kvm_mips_guest_has_msa(&vcpu->arch) && + read_c0_config5() & MIPS_CONF5_MSAEN) + __kvm_restore_msacsr(&vcpu->arch); + } + return ret; +} + +int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_exit_irqoff(); + ret = __kvm_mips_handle_exit(vcpu); + guest_state_enter_irqoff(); + + return ret; +} + +/* Enable FPU for guest and restore context */ +void kvm_own_fpu(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + sr = kvm_read_c0_guest_status(cop0); + + /* + * If MSA state is already live, it is undefined how it interacts with + * FR=0 FPU state, and we don't want to hit reserved instruction + * exceptions trying to save the MSA state later when CU=1 && FR=1, so + * play it safe and save it first. + */ + if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + /* + * Enable FPU for guest + * We set FR and FRE according to guest context + */ + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + enable_fpu_hazard(); + + /* If guest FPU state not active, restore it now */ + if (!(vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { + __kvm_restore_fpu(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU); + } else { + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_FPU); + } + + preempt_enable(); +} + +#ifdef CONFIG_CPU_HAS_MSA +/* Enable MSA for guest and restore context */ +void kvm_own_msa(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + /* + * Enable FPU if enabled in guest, since we're restoring FPU context + * anyway. We set FR and FRE according to guest context. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + sr = kvm_read_c0_guest_status(cop0); + + /* + * If FR=0 FPU state is already live, it is undefined how it + * interacts with MSA state, so play it safe and save it first. + */ + if (!(sr & ST0_FR) && + (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | + KVM_MIPS_AUX_MSA)) == KVM_MIPS_AUX_FPU) + kvm_lose_fpu(vcpu); + + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (sr & ST0_CU1 && cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + } + + /* Enable MSA for guest */ + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + switch (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA)) { + case KVM_MIPS_AUX_FPU: + /* + * Guest FPU state already loaded, only restore upper MSA state + */ + __kvm_restore_msa_upper(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_MSA); + break; + case 0: + /* Neither FPU or MSA already active, restore full MSA state */ + __kvm_restore_msa(&vcpu->arch); + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; + if (kvm_mips_guest_has_fpu(&vcpu->arch)) + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_FPU_MSA); + break; + default: + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_MSA); + break; + } + + preempt_enable(); +} +#endif + +/* Drop FPU & MSA without saving it */ +void kvm_drop_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + disable_msa(); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_MSA); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_MSA; + } + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_FPU); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; + } + preempt_enable(); +} + +/* Save and disable FPU & MSA */ +void kvm_lose_fpu(struct kvm_vcpu *vcpu) +{ + /* + * With T&E, FPU & MSA get disabled in root context (hardware) when it + * is disabled in guest context (software), but the register state in + * the hardware may still be in use. + * This is why we explicitly re-enable the hardware before saving. + */ + + preempt_disable(); + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + __kvm_save_msa(&vcpu->arch); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); + + /* Disable MSA & FPU */ + disable_msa(); + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); + } + vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); + } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { + __kvm_save_fpu(&vcpu->arch); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); + + /* Disable FPU */ + clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); + } + preempt_enable(); +} + +/* + * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are + * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP + * exception if cause bits are set in the value being written. + */ +static int kvm_mips_csr_die_notify(struct notifier_block *self, + unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + unsigned long pc; + + /* Only interested in FPE and MSAFPE */ + if (cmd != DIE_FP && cmd != DIE_MSAFP) + return NOTIFY_DONE; + + /* Return immediately if guest context isn't active */ + if (!(current->flags & PF_VCPU)) + return NOTIFY_DONE; + + /* Should never get here from user mode */ + BUG_ON(user_mode(regs)); + + pc = instruction_pointer(regs); + switch (cmd) { + case DIE_FP: + /* match 2nd instruction in __kvm_restore_fcsr */ + if (pc != (unsigned long)&__kvm_restore_fcsr + 4) + return NOTIFY_DONE; + break; + case DIE_MSAFP: + /* match 2nd/3rd instruction in __kvm_restore_msacsr */ + if (!cpu_has_msa || + pc < (unsigned long)&__kvm_restore_msacsr + 4 || + pc > (unsigned long)&__kvm_restore_msacsr + 8) + return NOTIFY_DONE; + break; + } + + /* Move PC forward a little and continue executing */ + instruction_pointer(regs) += 4; + + return NOTIFY_STOP; +} + +static struct notifier_block kvm_mips_csr_die_notifier = { + .notifier_call = kvm_mips_csr_die_notify, +}; + +static u32 kvm_default_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO_1] = C_IRQ0, + [MIPS_EXC_INT_IPI_1] = C_IRQ1, + [MIPS_EXC_INT_IPI_2] = C_IRQ2, +}; + +static u32 kvm_loongson3_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO_1] = C_IRQ0, + [MIPS_EXC_INT_IO_2] = C_IRQ1, + [MIPS_EXC_INT_IPI_1] = C_IRQ4, +}; + +u32 *kvm_priority_to_irq = kvm_default_priority_to_irq; + +u32 kvm_irq_to_priority(u32 irq) +{ + int i; + + for (i = MIPS_EXC_INT_TIMER; i < MIPS_EXC_MAX; i++) { + if (kvm_priority_to_irq[i] == (1 << (irq + 8))) + return i; + } + + return MIPS_EXC_MAX; +} + +static int __init kvm_mips_init(void) +{ + int ret; + + if (cpu_has_mmid) { + pr_warn("KVM does not yet support MMIDs. KVM Disabled\n"); + return -EOPNOTSUPP; + } + + ret = kvm_mips_entry_setup(); + if (ret) + return ret; + + ret = kvm_mips_emulation_init(); + if (ret) + return ret; + + + if (boot_cpu_type() == CPU_LOONGSON64) + kvm_priority_to_irq = kvm_loongson3_priority_to_irq; + + register_die_notifier(&kvm_mips_csr_die_notifier); + + ret = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (ret) { + unregister_die_notifier(&kvm_mips_csr_die_notifier); + return ret; + } + return 0; +} + +static void __exit kvm_mips_exit(void) +{ + kvm_exit(); + + unregister_die_notifier(&kvm_mips_csr_die_notifier); +} + +module_init(kvm_mips_init); +module_exit(kvm_mips_exit); + +EXPORT_TRACEPOINT_SYMBOL(kvm_exit); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c new file mode 100644 index 0000000000..467ee6b95a --- /dev/null +++ b/arch/mips/kvm/mmu.c @@ -0,0 +1,757 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS MMU handling in the KVM module. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/highmem.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> + +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels + * for which pages need to be cached. + */ +#if defined(__PAGETABLE_PMD_FOLDED) +#define KVM_MMU_CACHE_MIN_PAGES 1 +#else +#define KVM_MMU_CACHE_MIN_PAGES 2 +#endif + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +/** + * kvm_pgd_init() - Initialise KVM GPA page directory. + * @page: Pointer to page directory (PGD) for KVM GPA. + * + * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. + * representing no mappings. This is similar to pgd_init(), however it + * initialises all the page directory pointers, not just the ones corresponding + * to the userland address space (since it is for the guest physical address + * space rather than a virtual address space). + */ +static void kvm_pgd_init(void *page) +{ + unsigned long *p, *end; + unsigned long entry; + +#ifdef __PAGETABLE_PMD_FOLDED + entry = (unsigned long)invalid_pte_table; +#else + entry = (unsigned long)invalid_pmd_table; +#endif + + p = (unsigned long *)page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} + +/** + * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. + * + * Allocate a blank KVM GPA page directory (PGD) for representing guest physical + * to host physical page mappings. + * + * Returns: Pointer to new KVM GPA page directory. + * NULL on allocation failure. + */ +pgd_t *kvm_pgd_alloc(void) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER); + if (ret) + kvm_pgd_init(ret); + + return ret; +} + +/** + * kvm_mips_walk_pgd() - Walk page table with optional allocation. + * @pgd: Page directory pointer. + * @addr: Address to index page table using. + * @cache: MMU page cache to allocate new page tables from, or NULL. + * + * Walk the page tables pointed to by @pgd to find the PTE corresponding to the + * address @addr. If page tables don't exist for @addr, they will be created + * from the MMU cache if @cache is not NULL. + * + * Returns: Pointer to pte_t corresponding to @addr. + * NULL if a page table doesn't exist for @addr and !@cache. + * NULL if a page table allocation failed. + */ +static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd += pgd_index(addr); + if (pgd_none(*pgd)) { + /* Not used on MIPS yet */ + BUG(); + return NULL; + } + p4d = p4d_offset(pgd, addr); + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new_pmd; + + if (!cache) + return NULL; + new_pmd = kvm_mmu_memory_cache_alloc(cache); + pmd_init(new_pmd); + pud_populate(NULL, pud, new_pmd); + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new_pte; + + if (!cache) + return NULL; + new_pte = kvm_mmu_memory_cache_alloc(cache); + clear_page(new_pte); + pmd_populate_kernel(NULL, pmd, new_pte); + } + return pte_offset_kernel(pmd, addr); +} + +/* Caller must hold kvm->mm_lock */ +static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); +} + +/* + * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ + +static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, + unsigned long end_gpa) +{ + int i_min = pte_index(start_gpa); + int i_max = pte_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, + unsigned long end_gpa) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = pmd_index(start_gpa); + int i_max = pmd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pmd_present(pmd[i])) + continue; + + pte = pte_offset_kernel(pmd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, + unsigned long end_gpa) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = pud_index(start_gpa); + int i_max = pud_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, + unsigned long end_gpa) +{ + p4d_t *p4d; + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gpa); + int i_max = pgd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pgd_present(pgd[i])) + continue; + + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +/** + * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Flushes a range of GPA mappings from the GPA page tables. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether its safe to remove the top level page directory because + * all lower levels have been removed. + */ +bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +#define BUILD_PTE_RANGE_OP(name, op) \ +static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + int i_min = pte_index(start); \ + int i_max = pte_index(end); \ + int i; \ + pte_t old, new; \ + \ + for (i = i_min; i <= i_max; ++i) { \ + if (!pte_present(pte[i])) \ + continue; \ + \ + old = pte[i]; \ + new = op(old); \ + if (pte_val(new) == pte_val(old)) \ + continue; \ + set_pte(pte + i, new); \ + ret = 1; \ + } \ + return ret; \ +} \ + \ +/* returns true if anything was done */ \ +static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pte_t *pte; \ + unsigned long cur_end = ~0ul; \ + int i_min = pmd_index(start); \ + int i_max = pmd_index(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pmd_present(pmd[i])) \ + continue; \ + \ + pte = pte_offset_kernel(pmd + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pmd_t *pmd; \ + unsigned long cur_end = ~0ul; \ + int i_min = pud_index(start); \ + int i_max = pud_index(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pud_present(pud[i])) \ + continue; \ + \ + pmd = pmd_offset(pud + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + p4d_t *p4d; \ + pud_t *pud; \ + unsigned long cur_end = ~0ul; \ + int i_min = pgd_index(start); \ + int i_max = pgd_index(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pgd_present(pgd[i])) \ + continue; \ + \ + p4d = p4d_offset(pgd, 0); \ + pud = pud_offset(p4d + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ + } \ + return ret; \ +} + +/* + * kvm_mips_mkclean_gpa_pt. + * Mark a range of guest physical address space clean (writes fault) in the VM's + * GPA page table to allow dirty page tracking. + */ + +BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) + +/** + * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Make a range of GPA mappings clean so that guest writes will fault and + * trigger dirty page logging. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether any GPA mappings were modified, which would require + * derived mappings (GVA page tables & TLB enties) to be + * invalidated. + */ +int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +/** + * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire @kvm->mmu_lock. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + gfn_t base_gfn = slot->base_gfn + gfn_offset; + gfn_t start = base_gfn + __ffs(mask); + gfn_t end = base_gfn + __fls(mask); + + kvm_mips_mkclean_gpa_pt(kvm, start, end); +} + +/* + * kvm_mips_mkold_gpa_pt. + * Mark a range of guest physical address space old (all accesses fault) in the + * VM's GPA page table to allow detection of commonly used pages. + */ + +BUILD_PTE_RANGE_OP(mkold, pte_mkold) + +static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, + gfn_t end_gfn) +{ + return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) +{ + kvm_mips_flush_gpa_pt(kvm, range->start, range->end); + return true; +} + +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + gpa_t gpa = range->start << PAGE_SHIFT; + pte_t hva_pte = range->arg.pte; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + pte_t old_pte; + + if (!gpa_pte) + return false; + + /* Mapping may need adjusting depending on memslot flags */ + old_pte = *gpa_pte; + if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) + hva_pte = pte_mkclean(hva_pte); + else if (range->slot->flags & KVM_MEM_READONLY) + hva_pte = pte_wrprotect(hva_pte); + + set_pte(gpa_pte, hva_pte); + + /* Replacing an absent or old page doesn't need flushes */ + if (!pte_present(old_pte) || !pte_young(old_pte)) + return false; + + /* Pages swapped, aged, moved, or cleaned require flushes */ + return !pte_present(hva_pte) || + !pte_young(hva_pte) || + pte_pfn(old_pte) != pte_pfn(hva_pte) || + (pte_dirty(old_pte) && !pte_dirty(hva_pte)); +} + +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end); +} + +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + gpa_t gpa = range->start << PAGE_SHIFT; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + + if (!gpa_pte) + return false; + return pte_young(*gpa_pte); +} + +/** + * _kvm_mips_map_page_fast() - Fast path GPA fault handler. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Perform fast path GPA fault handling, doing all that can be done without + * calling into KVM. This handles marking old pages young (for idle page + * tracking), and dirtying of clean pages (for dirty page logging). + * + * Returns: 0 on success, in which case we can update derived mappings and + * resume guest execution. + * -EFAULT on failure due to absent GPA mapping or write to + * read-only page, in which case KVM must be consulted. + */ +static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + gfn_t gfn = gpa >> PAGE_SHIFT; + pte_t *ptep; + kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ + bool pfn_valid = false; + int ret = 0; + + spin_lock(&kvm->mmu_lock); + + /* Fast path - just check GPA page table for an existing entry */ + ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + if (!ptep || !pte_present(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track access to pages marked old */ + if (!pte_young(*ptep)) { + set_pte(ptep, pte_mkyoung(*ptep)); + pfn = pte_pfn(*ptep); + pfn_valid = true; + /* call kvm_set_pfn_accessed() after unlock */ + } + if (write_fault && !pte_dirty(*ptep)) { + if (!pte_write(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track dirtying of writeable pages */ + set_pte(ptep, pte_mkdirty(*ptep)); + pfn = pte_pfn(*ptep); + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + +out: + spin_unlock(&kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); + return ret; +} + +/** + * kvm_mips_map_page() - Map a guest physical page. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Handle GPA faults by creating a new GPA mapping (or updating an existing + * one). + * + * This takes care of marking pages young or dirty (idle/dirty page tracking), + * asking KVM for the corresponding PFN, and creating a mapping in the GPA page + * tables. Derived mappings (GVA page tables and TLBs) must be handled by the + * caller. + * + * Returns: 0 on success, in which case the caller may use the @out_entry + * and @out_buddy PTEs to update derived mappings and resume guest + * execution. + * -EFAULT if there is no memory region at @gpa or a write was + * attempted to a read-only memory region. This is usually handled + * as an MMIO access. + */ +static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + gfn_t gfn = gpa >> PAGE_SHIFT; + int srcu_idx, err; + kvm_pfn_t pfn; + pte_t *ptep, entry; + bool writeable; + unsigned long prot_bits; + unsigned long mmu_seq; + + /* Try the fast path to handle old / clean pages */ + srcu_idx = srcu_read_lock(&kvm->srcu); + err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, + out_buddy); + if (!err) + goto out; + + /* We need a minimum of cached pages ready for page table creation */ + err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); + if (err) + goto out; + +retry: + /* + * Used to check for invalidations in progress, of the pfn that is + * returned by pfn_to_pfn_prot below. + */ + mmu_seq = kvm->mmu_invalidate_seq; + /* + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * risk the page we get a reference to getting unmapped before we have a + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. + * + * This smp_rmb() pairs with the effective smp_wmb() of the combination + * of the pte_unmap_unlock() after the PTE is zapped, and the + * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before + * mmu_invalidate_seq is incremented. + */ + smp_rmb(); + + /* Slow path - ask KVM core whether we can access this GPA */ + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); + if (is_error_noslot_pfn(pfn)) { + err = -EFAULT; + goto out; + } + + spin_lock(&kvm->mmu_lock); + /* Check if an invalidation has taken place since we got pfn */ + if (mmu_invalidate_retry(kvm, mmu_seq)) { + /* + * This can happen when mappings are changed asynchronously, but + * also synchronously if a COW is triggered by + * gfn_to_pfn_prot(). + */ + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + goto retry; + } + + /* Ensure page tables are allocated */ + ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); + + /* Set up the PTE */ + prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; + if (writeable) { + prot_bits |= _PAGE_WRITE; + if (write_fault) { + prot_bits |= __WRITEABLE; + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + } + entry = pfn_pte(pfn, __pgprot(prot_bits)); + + /* Write the PTE */ + set_pte(ptep, entry); + + err = 0; + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + kvm_set_pfn_accessed(pfn); +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return err; +} + +int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu, + bool write_fault) +{ + int ret; + + ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); + if (ret) + return ret; + + /* Invalidate this entry in the TLB */ + return kvm_vz_host_tlb_inv(vcpu, badvaddr); +} + +/** + * kvm_mips_migrate_count() - Migrate timer. + * @vcpu: Virtual CPU. + * + * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it + * if it was running prior to being cancelled. + * + * Must be called when the VCPU is migrated to a different CPU to ensure that + * timer expiry during guest execution interrupts the guest and causes the + * interrupt to be delivered in a timely manner. + */ +static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) +{ + if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) + hrtimer_restart(&vcpu->arch.comparecount_timer); +} + +/* Restore ASID once we are scheduled back after preemption */ +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + unsigned long flags; + + kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); + + local_irq_save(flags); + + vcpu->cpu = cpu; + if (vcpu->arch.last_sched_cpu != cpu) { + kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", + vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + /* + * Migrate the timer interrupt to the current CPU so that it + * always interrupts the guest and synchronously triggers a + * guest timer interrupt. + */ + kvm_mips_migrate_count(vcpu); + } + + /* restore guest state to registers */ + kvm_mips_callbacks->vcpu_load(vcpu, cpu); + + local_irq_restore(flags); +} + +/* ASID can change if another task is scheduled during preemption */ +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + vcpu->arch.last_sched_cpu = cpu; + vcpu->cpu = -1; + + /* save guest state in registers */ + kvm_mips_callbacks->vcpu_put(vcpu, cpu); + + local_irq_restore(flags); +} diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S new file mode 100644 index 0000000000..d02f0c6cc2 --- /dev/null +++ b/arch/mips/kvm/msa.S @@ -0,0 +1,161 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * MIPS SIMD Architecture (MSA) context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/asmmacro.h> +#include <asm/regdef.h> + + .set noreorder + .set noat + +LEAF(__kvm_save_msa) + st_d 0, VCPU_FPR0, a0 + st_d 1, VCPU_FPR1, a0 + st_d 2, VCPU_FPR2, a0 + st_d 3, VCPU_FPR3, a0 + st_d 4, VCPU_FPR4, a0 + st_d 5, VCPU_FPR5, a0 + st_d 6, VCPU_FPR6, a0 + st_d 7, VCPU_FPR7, a0 + st_d 8, VCPU_FPR8, a0 + st_d 9, VCPU_FPR9, a0 + st_d 10, VCPU_FPR10, a0 + st_d 11, VCPU_FPR11, a0 + st_d 12, VCPU_FPR12, a0 + st_d 13, VCPU_FPR13, a0 + st_d 14, VCPU_FPR14, a0 + st_d 15, VCPU_FPR15, a0 + st_d 16, VCPU_FPR16, a0 + st_d 17, VCPU_FPR17, a0 + st_d 18, VCPU_FPR18, a0 + st_d 19, VCPU_FPR19, a0 + st_d 20, VCPU_FPR20, a0 + st_d 21, VCPU_FPR21, a0 + st_d 22, VCPU_FPR22, a0 + st_d 23, VCPU_FPR23, a0 + st_d 24, VCPU_FPR24, a0 + st_d 25, VCPU_FPR25, a0 + st_d 26, VCPU_FPR26, a0 + st_d 27, VCPU_FPR27, a0 + st_d 28, VCPU_FPR28, a0 + st_d 29, VCPU_FPR29, a0 + st_d 30, VCPU_FPR30, a0 + st_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_save_msa) + +LEAF(__kvm_restore_msa) + ld_d 0, VCPU_FPR0, a0 + ld_d 1, VCPU_FPR1, a0 + ld_d 2, VCPU_FPR2, a0 + ld_d 3, VCPU_FPR3, a0 + ld_d 4, VCPU_FPR4, a0 + ld_d 5, VCPU_FPR5, a0 + ld_d 6, VCPU_FPR6, a0 + ld_d 7, VCPU_FPR7, a0 + ld_d 8, VCPU_FPR8, a0 + ld_d 9, VCPU_FPR9, a0 + ld_d 10, VCPU_FPR10, a0 + ld_d 11, VCPU_FPR11, a0 + ld_d 12, VCPU_FPR12, a0 + ld_d 13, VCPU_FPR13, a0 + ld_d 14, VCPU_FPR14, a0 + ld_d 15, VCPU_FPR15, a0 + ld_d 16, VCPU_FPR16, a0 + ld_d 17, VCPU_FPR17, a0 + ld_d 18, VCPU_FPR18, a0 + ld_d 19, VCPU_FPR19, a0 + ld_d 20, VCPU_FPR20, a0 + ld_d 21, VCPU_FPR21, a0 + ld_d 22, VCPU_FPR22, a0 + ld_d 23, VCPU_FPR23, a0 + ld_d 24, VCPU_FPR24, a0 + ld_d 25, VCPU_FPR25, a0 + ld_d 26, VCPU_FPR26, a0 + ld_d 27, VCPU_FPR27, a0 + ld_d 28, VCPU_FPR28, a0 + ld_d 29, VCPU_FPR29, a0 + ld_d 30, VCPU_FPR30, a0 + ld_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_restore_msa) + + .macro kvm_restore_msa_upper wr, off, base + .set push + .set noat +#ifdef CONFIG_64BIT + ld $1, \off(\base) + insert_d \wr, 1 +#elif defined(CONFIG_CPU_LITTLE_ENDIAN) + lw $1, \off(\base) + insert_w \wr, 2 + lw $1, (\off+4)(\base) + insert_w \wr, 3 +#else /* CONFIG_CPU_BIG_ENDIAN */ + lw $1, (\off+4)(\base) + insert_w \wr, 2 + lw $1, \off(\base) + insert_w \wr, 3 +#endif + .set pop + .endm + +LEAF(__kvm_restore_msa_upper) + kvm_restore_msa_upper 0, VCPU_FPR0 +8, a0 + kvm_restore_msa_upper 1, VCPU_FPR1 +8, a0 + kvm_restore_msa_upper 2, VCPU_FPR2 +8, a0 + kvm_restore_msa_upper 3, VCPU_FPR3 +8, a0 + kvm_restore_msa_upper 4, VCPU_FPR4 +8, a0 + kvm_restore_msa_upper 5, VCPU_FPR5 +8, a0 + kvm_restore_msa_upper 6, VCPU_FPR6 +8, a0 + kvm_restore_msa_upper 7, VCPU_FPR7 +8, a0 + kvm_restore_msa_upper 8, VCPU_FPR8 +8, a0 + kvm_restore_msa_upper 9, VCPU_FPR9 +8, a0 + kvm_restore_msa_upper 10, VCPU_FPR10+8, a0 + kvm_restore_msa_upper 11, VCPU_FPR11+8, a0 + kvm_restore_msa_upper 12, VCPU_FPR12+8, a0 + kvm_restore_msa_upper 13, VCPU_FPR13+8, a0 + kvm_restore_msa_upper 14, VCPU_FPR14+8, a0 + kvm_restore_msa_upper 15, VCPU_FPR15+8, a0 + kvm_restore_msa_upper 16, VCPU_FPR16+8, a0 + kvm_restore_msa_upper 17, VCPU_FPR17+8, a0 + kvm_restore_msa_upper 18, VCPU_FPR18+8, a0 + kvm_restore_msa_upper 19, VCPU_FPR19+8, a0 + kvm_restore_msa_upper 20, VCPU_FPR20+8, a0 + kvm_restore_msa_upper 21, VCPU_FPR21+8, a0 + kvm_restore_msa_upper 22, VCPU_FPR22+8, a0 + kvm_restore_msa_upper 23, VCPU_FPR23+8, a0 + kvm_restore_msa_upper 24, VCPU_FPR24+8, a0 + kvm_restore_msa_upper 25, VCPU_FPR25+8, a0 + kvm_restore_msa_upper 26, VCPU_FPR26+8, a0 + kvm_restore_msa_upper 27, VCPU_FPR27+8, a0 + kvm_restore_msa_upper 28, VCPU_FPR28+8, a0 + kvm_restore_msa_upper 29, VCPU_FPR29+8, a0 + kvm_restore_msa_upper 30, VCPU_FPR30+8, a0 + kvm_restore_msa_upper 31, VCPU_FPR31+8, a0 + jr ra + nop + END(__kvm_restore_msa_upper) + +LEAF(__kvm_restore_msacsr) + lw t0, VCPU_MSA_CSR(a0) + /* + * The ctcmsa must stay at this offset in __kvm_restore_msacsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an MSA FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + _ctcmsa MSA_CSR, t0 + jr ra + nop + END(__kvm_restore_msacsr) diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c new file mode 100644 index 0000000000..3e6682018f --- /dev/null +++ b/arch/mips/kvm/stats.c @@ -0,0 +1,63 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: COP0 access histogram + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/kvm_host.h> + +char *kvm_cop0_str[N_MIPS_COPROC_REGS] = { + "Index", + "Random", + "EntryLo0", + "EntryLo1", + "Context", + "PG Mask", + "Wired", + "HWREna", + "BadVAddr", + "Count", + "EntryHI", + "Compare", + "Status", + "Cause", + "EXC PC", + "PRID", + "Config", + "LLAddr", + "Watch Lo", + "Watch Hi", + "X Context", + "Reserved", + "Impl Dep", + "Debug", + "DEPC", + "PerfCnt", + "ErrCtl", + "CacheErr", + "TagLo", + "TagHi", + "ErrorEPC", + "DESAVE" +}; + +void kvm_mips_dump_stats(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + int i, j; + + kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); + for (i = 0; i < N_MIPS_COPROC_REGS; i++) { + for (j = 0; j < N_MIPS_COPROC_SEL; j++) { + if (vcpu->arch.cop0.stat[i][j]) + kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j, + vcpu->arch.cop0.stat[i][j]); + } + } +#endif +} diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c new file mode 100644 index 0000000000..4e91971daa --- /dev/null +++ b/arch/mips/kvm/tlb.c @@ -0,0 +1,525 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS TLB handling, this file is part of the Linux host kernel so that + * TLB handlers run from KSEG0 + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/kvm_host.h> +#include <linux/srcu.h> + +#include <asm/cpu.h> +#include <asm/bootinfo.h> +#include <asm/mipsregs.h> +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> +#include <asm/tlb.h> +#include <asm/tlbdebug.h> + +#undef CONFIG_MIPS_MT +#include <asm/r4kcache.h> +#define CONFIG_MIPS_MT + +unsigned long GUESTID_MASK; +EXPORT_SYMBOL_GPL(GUESTID_MASK); +unsigned long GUESTID_FIRST_VERSION; +EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION); +unsigned long GUESTID_VERSION_MASK; +EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK); + +static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu) +{ + struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm; + + if (cpu_has_guestid) + return 0; + else + return cpu_asid(smp_processor_id(), gpa_mm); +} + +static int _kvm_mips_host_tlb_inv(unsigned long entryhi) +{ + int idx; + + write_c0_entryhi(entryhi); + mtc0_tlbw_hazard(); + + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + + BUG_ON(idx >= current_cpu_data.tlbsize); + + if (idx >= 0) { + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + mtc0_tlbw_hazard(); + + tlb_write_indexed(); + tlbw_use_hazard(); + } + + return idx; +} + +/* GuestID management */ + +/** + * clear_root_gid() - Set GuestCtl1.RID for normal root operation. + */ +static inline void clear_root_gid(void) +{ + if (cpu_has_guestid) { + clear_c0_guestctl1(MIPS_GCTL1_RID); + mtc0_tlbw_hazard(); + } +} + +/** + * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID. + * + * Sets the root GuestID to match the current guest GuestID, for TLB operation + * on the GPA->RPA mappings in the root TLB. + * + * The caller must be sure to disable HTW while the root GID is set, and + * possibly longer if TLB registers are modified. + */ +static inline void set_root_gid_to_guest_gid(void) +{ + unsigned int guestctl1; + + if (cpu_has_guestid) { + back_to_back_c0_hazard(); + guestctl1 = read_c0_guestctl1(); + guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) | + ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT) + << MIPS_GCTL1_RID_SHIFT; + write_c0_guestctl1(guestctl1); + mtc0_tlbw_hazard(); + } +} + +int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) +{ + int idx; + unsigned long flags, old_entryhi; + + local_irq_save(flags); + htw_stop(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + old_entryhi = read_c0_entryhi(); + + idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu)); + + write_c0_entryhi(old_entryhi); + clear_root_gid(); + mtc0_tlbw_hazard(); + + htw_start(); + local_irq_restore(flags); + + /* + * We don't want to get reserved instruction exceptions for missing tlb + * entries. + */ + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (idx > 0) + kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu), idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv); + +/** + * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping. + * @vcpu: KVM VCPU pointer. + * @gpa: Guest virtual address in a TLB mapped guest segment. + * @gpa: Pointer to output guest physical address it maps to. + * + * Converts a guest virtual address in a guest TLB mapped segment to a guest + * physical address, by probing the guest TLB. + * + * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been + * written. + * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not + * have been written. + */ +int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + unsigned long o_entryhi, o_entrylo[2], o_pagemask; + unsigned int o_index; + unsigned long entrylo[2], pagemask, pagemaskbit, pa; + unsigned long flags; + int index; + + /* Probe the guest TLB for a mapping */ + local_irq_save(flags); + /* Set root GuestID for root probe of guest TLB entry */ + htw_stop(); + set_root_gid_to_guest_gid(); + + o_entryhi = read_gc0_entryhi(); + o_index = read_gc0_index(); + + write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl)); + mtc0_tlbw_hazard(); + guest_tlb_probe(); + tlb_probe_hazard(); + + index = read_gc0_index(); + if (index < 0) { + /* No match, fail */ + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + return -EFAULT; + } + + /* Match! read the TLB entry */ + o_entrylo[0] = read_gc0_entrylo0(); + o_entrylo[1] = read_gc0_entrylo1(); + o_pagemask = read_gc0_pagemask(); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + entrylo[0] = read_gc0_entrylo0(); + entrylo[1] = read_gc0_entrylo1(); + pagemask = ~read_gc0_pagemask() & ~0x1fffl; + + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + write_gc0_entrylo0(o_entrylo[0]); + write_gc0_entrylo1(o_entrylo[1]); + write_gc0_pagemask(o_pagemask); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + + /* Select one of the EntryLo values and interpret the GPA */ + pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1; + pa = entrylo[!!(gva & pagemaskbit)]; + + /* + * TLB entry may have become invalid since TLB probe if physical FTLB + * entries are shared between threads (e.g. I6400). + */ + if (!(pa & ENTRYLO_V)) + return -EFAULT; + + /* + * Note, this doesn't take guest MIPS32 XPA into account, where PFN is + * split with XI/RI in the middle. + */ + pa = (pa << 6) & ~0xfffl; + pa |= gva & ~(pagemask | pagemaskbit); + + *gpa = pa; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup); + +/** + * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for + * guests. + * + * Invalidate all entries in root tlb which are GPA mappings. + */ +void kvm_vz_local_flush_roottlb_all_guests(void) +{ + unsigned long flags; + unsigned long old_entryhi, old_pagemask, old_guestctl1; + int entry; + + if (WARN_ON(!cpu_has_guestid)) + return; + + local_irq_save(flags); + htw_stop(); + + /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */ + old_entryhi = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + old_guestctl1 = read_c0_guestctl1(); + + /* + * Invalidate guest entries in root TLB while leaving root entries + * intact when possible. + */ + for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_read(); + tlb_read_hazard(); + + /* Don't invalidate non-guest (RVA) mappings in the root TLB */ + if (!(read_c0_guestctl1() & MIPS_GCTL1_RID)) + continue; + + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_guestctl1(0); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + + write_c0_entryhi(old_entryhi); + write_c0_pagemask(old_pagemask); + write_c0_guestctl1(old_guestctl1); + tlbw_use_hazard(); + + htw_start(); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests); + +/** + * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries. + * + * Invalidate all entries in guest tlb irrespective of guestid. + */ +void kvm_vz_local_flush_guesttlb_all(void) +{ + unsigned long flags; + unsigned long old_index; + unsigned long old_entryhi; + unsigned long old_entrylo[2]; + unsigned long old_pagemask; + int entry; + u64 cvmmemctl2 = 0; + + local_irq_save(flags); + + /* Preserve all clobbered guest registers */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo[0] = read_gc0_entrylo0(); + old_entrylo[1] = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Inhibit machine check due to multiple matching TLB entries */ + cvmmemctl2 = read_c0_cvmmemctl2(); + cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + break; + } + + /* Invalidate guest entries in guest TLB */ + write_gc0_entrylo0(0); + write_gc0_entrylo1(0); + write_gc0_pagemask(0); + for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) { + /* Make sure all entries differ. */ + write_gc0_index(entry); + write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry)); + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + if (cvmmemctl2) { + cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + } + + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo[0]); + write_gc0_entrylo1(old_entrylo[1]); + write_gc0_pagemask(old_pagemask); + tlbw_use_hazard(); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all); + +/** + * kvm_vz_save_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to write TLB entries into. + * @index: Start index. + * @count: Number of entries to save. + * + * Save a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + unsigned int guestctl1 = 0; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + if (cpu_has_guestid) + guestctl1 = read_c0_guestctl1(); + + /* Read each entry from guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + if (cpu_has_guestid && + (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) { + /* Entry invalid or belongs to another guest */ + buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + buf->tlb_lo[0] = 0; + buf->tlb_lo[1] = 0; + buf->tlb_mask = 0; + } else { + /* Entry belongs to the right guest */ + buf->tlb_hi = read_gc0_entryhi(); + buf->tlb_lo[0] = read_gc0_entrylo0(); + buf->tlb_lo[1] = read_gc0_entrylo1(); + buf->tlb_mask = read_gc0_pagemask(); + } + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb); + +/** + * kvm_vz_load_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to read TLB entries from. + * @index: Start index. + * @count: Number of entries to load. + * + * Load a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + + /* Write each entry to guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + write_gc0_entryhi(buf->tlb_hi); + write_gc0_entrylo0(buf->tlb_lo[0]); + write_gc0_entrylo1(buf->tlb_lo[1]); + write_gc0_pagemask(buf->tlb_mask); + + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb); + +#ifdef CONFIG_CPU_LOONGSON64 +void kvm_loongson_clear_guest_vtlb(void) +{ + int idx = read_gc0_index(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + write_gc0_index(0); + guest_tlbinvf(); + write_gc0_index(idx); + + clear_root_gid(); + set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); +} +EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_vtlb); + +void kvm_loongson_clear_guest_ftlb(void) +{ + int i; + int idx = read_gc0_index(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + for (i = current_cpu_data.tlbsizevtlb; + i < (current_cpu_data.tlbsizevtlb + + current_cpu_data.tlbsizeftlbsets); + i++) { + write_gc0_index(i); + guest_tlbinvf(); + } + write_gc0_index(idx); + + clear_root_gid(); + set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); +} +EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_ftlb); +#endif diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h new file mode 100644 index 0000000000..136c3535a1 --- /dev/null +++ b/arch/mips/kvm/trace.h @@ -0,0 +1,346 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + +/* + * arch/mips/kvm/mips.c + */ +extern bool kvm_trace_guest_mode_change; +int kvm_guest_mode_change_trace_reg(void); +void kvm_guest_mode_change_trace_unreg(void); + +/* + * Tracepoints for VM enters + */ +DECLARE_EVENT_CLASS(kvm_transition, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, pc) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + ), + + TP_printk("PC: 0x%08lx", + __entry->pc) +); + +DEFINE_EVENT(kvm_transition, kvm_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_reenter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_out, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +/* The first 32 exit reasons correspond to Cause.ExcCode */ +#define KVM_TRACE_EXIT_INT 0 +#define KVM_TRACE_EXIT_TLBMOD 1 +#define KVM_TRACE_EXIT_TLBMISS_LD 2 +#define KVM_TRACE_EXIT_TLBMISS_ST 3 +#define KVM_TRACE_EXIT_ADDRERR_LD 4 +#define KVM_TRACE_EXIT_ADDRERR_ST 5 +#define KVM_TRACE_EXIT_SYSCALL 8 +#define KVM_TRACE_EXIT_BREAK_INST 9 +#define KVM_TRACE_EXIT_RESVD_INST 10 +#define KVM_TRACE_EXIT_COP_UNUSABLE 11 +#define KVM_TRACE_EXIT_TRAP_INST 13 +#define KVM_TRACE_EXIT_MSA_FPE 14 +#define KVM_TRACE_EXIT_FPE 15 +#define KVM_TRACE_EXIT_MSA_DISABLED 21 +#define KVM_TRACE_EXIT_GUEST_EXIT 27 +/* Further exit reasons */ +#define KVM_TRACE_EXIT_WAIT 32 +#define KVM_TRACE_EXIT_CACHE 33 +#define KVM_TRACE_EXIT_SIGNAL 34 +/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */ +#define KVM_TRACE_EXIT_GEXCCODE_BASE 64 +#define KVM_TRACE_EXIT_GPSI 64 /* 0 */ +#define KVM_TRACE_EXIT_GSFC 65 /* 1 */ +#define KVM_TRACE_EXIT_HC 66 /* 2 */ +#define KVM_TRACE_EXIT_GRR 67 /* 3 */ +#define KVM_TRACE_EXIT_GVA 72 /* 8 */ +#define KVM_TRACE_EXIT_GHFC 73 /* 9 */ +#define KVM_TRACE_EXIT_GPA 74 /* 10 */ + +/* Tracepoints for VM exits */ +#define kvm_trace_symbol_exit_types \ + { KVM_TRACE_EXIT_INT, "Interrupt" }, \ + { KVM_TRACE_EXIT_TLBMOD, "TLB Mod" }, \ + { KVM_TRACE_EXIT_TLBMISS_LD, "TLB Miss (LD)" }, \ + { KVM_TRACE_EXIT_TLBMISS_ST, "TLB Miss (ST)" }, \ + { KVM_TRACE_EXIT_ADDRERR_LD, "Address Error (LD)" }, \ + { KVM_TRACE_EXIT_ADDRERR_ST, "Address Err (ST)" }, \ + { KVM_TRACE_EXIT_SYSCALL, "System Call" }, \ + { KVM_TRACE_EXIT_BREAK_INST, "Break Inst" }, \ + { KVM_TRACE_EXIT_RESVD_INST, "Reserved Inst" }, \ + { KVM_TRACE_EXIT_COP_UNUSABLE, "COP0/1 Unusable" }, \ + { KVM_TRACE_EXIT_TRAP_INST, "Trap Inst" }, \ + { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ + { KVM_TRACE_EXIT_FPE, "FPE" }, \ + { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ + { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \ + { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ + { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ + { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \ + { KVM_TRACE_EXIT_GPSI, "GPSI" }, \ + { KVM_TRACE_EXIT_GSFC, "GSFC" }, \ + { KVM_TRACE_EXIT_HC, "HC" }, \ + { KVM_TRACE_EXIT_GRR, "GRR" }, \ + { KVM_TRACE_EXIT_GVA, "GVA" }, \ + { KVM_TRACE_EXIT_GHFC, "GHFC" }, \ + { KVM_TRACE_EXIT_GPA, "GPA" } + +TRACE_EVENT(kvm_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), + TP_ARGS(vcpu, reason), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(unsigned int, reason) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->reason = reason; + ), + + TP_printk("[%s]PC: 0x%08lx", + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) +); + +#define KVM_TRACE_MFC0 0 +#define KVM_TRACE_MTC0 1 +#define KVM_TRACE_DMFC0 2 +#define KVM_TRACE_DMTC0 3 +#define KVM_TRACE_RDHWR 4 + +#define KVM_TRACE_HWR_COP0 0 +#define KVM_TRACE_HWR_HWR 1 + +#define KVM_TRACE_COP0(REG, SEL) ((KVM_TRACE_HWR_COP0 << 8) | \ + ((REG) << 3) | (SEL)) +#define KVM_TRACE_HWR(REG, SEL) ((KVM_TRACE_HWR_HWR << 8) | \ + ((REG) << 3) | (SEL)) + +#define kvm_trace_symbol_hwr_ops \ + { KVM_TRACE_MFC0, "MFC0" }, \ + { KVM_TRACE_MTC0, "MTC0" }, \ + { KVM_TRACE_DMFC0, "DMFC0" }, \ + { KVM_TRACE_DMTC0, "DMTC0" }, \ + { KVM_TRACE_RDHWR, "RDHWR" } + +#define kvm_trace_symbol_hwr_cop \ + { KVM_TRACE_HWR_COP0, "COP0" }, \ + { KVM_TRACE_HWR_HWR, "HWR" } + +#define kvm_trace_symbol_hwr_regs \ + { KVM_TRACE_COP0( 0, 0), "Index" }, \ + { KVM_TRACE_COP0( 2, 0), "EntryLo0" }, \ + { KVM_TRACE_COP0( 3, 0), "EntryLo1" }, \ + { KVM_TRACE_COP0( 4, 0), "Context" }, \ + { KVM_TRACE_COP0( 4, 2), "UserLocal" }, \ + { KVM_TRACE_COP0( 5, 0), "PageMask" }, \ + { KVM_TRACE_COP0( 6, 0), "Wired" }, \ + { KVM_TRACE_COP0( 7, 0), "HWREna" }, \ + { KVM_TRACE_COP0( 8, 0), "BadVAddr" }, \ + { KVM_TRACE_COP0( 9, 0), "Count" }, \ + { KVM_TRACE_COP0(10, 0), "EntryHi" }, \ + { KVM_TRACE_COP0(11, 0), "Compare" }, \ + { KVM_TRACE_COP0(12, 0), "Status" }, \ + { KVM_TRACE_COP0(12, 1), "IntCtl" }, \ + { KVM_TRACE_COP0(12, 2), "SRSCtl" }, \ + { KVM_TRACE_COP0(13, 0), "Cause" }, \ + { KVM_TRACE_COP0(14, 0), "EPC" }, \ + { KVM_TRACE_COP0(15, 0), "PRId" }, \ + { KVM_TRACE_COP0(15, 1), "EBase" }, \ + { KVM_TRACE_COP0(16, 0), "Config" }, \ + { KVM_TRACE_COP0(16, 1), "Config1" }, \ + { KVM_TRACE_COP0(16, 2), "Config2" }, \ + { KVM_TRACE_COP0(16, 3), "Config3" }, \ + { KVM_TRACE_COP0(16, 4), "Config4" }, \ + { KVM_TRACE_COP0(16, 5), "Config5" }, \ + { KVM_TRACE_COP0(16, 7), "Config7" }, \ + { KVM_TRACE_COP0(17, 1), "MAAR" }, \ + { KVM_TRACE_COP0(17, 2), "MAARI" }, \ + { KVM_TRACE_COP0(26, 0), "ECC" }, \ + { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ + { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ + { KVM_TRACE_COP0(31, 3), "KScratch2" }, \ + { KVM_TRACE_COP0(31, 4), "KScratch3" }, \ + { KVM_TRACE_COP0(31, 5), "KScratch4" }, \ + { KVM_TRACE_COP0(31, 6), "KScratch5" }, \ + { KVM_TRACE_COP0(31, 7), "KScratch6" }, \ + { KVM_TRACE_HWR( 0, 0), "CPUNum" }, \ + { KVM_TRACE_HWR( 1, 0), "SYNCI_Step" }, \ + { KVM_TRACE_HWR( 2, 0), "CC" }, \ + { KVM_TRACE_HWR( 3, 0), "CCRes" }, \ + { KVM_TRACE_HWR(29, 0), "ULR" } + +TRACE_EVENT(kvm_hwr, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, unsigned int reg, + unsigned long val), + TP_ARGS(vcpu, op, reg, val), + TP_STRUCT__entry( + __field(unsigned long, val) + __field(u16, reg) + __field(u8, op) + ), + + TP_fast_assign( + __entry->val = val; + __entry->reg = reg; + __entry->op = op; + ), + + TP_printk("%s %s (%s:%u:%u) 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_hwr_ops), + __print_symbolic(__entry->reg, + kvm_trace_symbol_hwr_regs), + __print_symbolic(__entry->reg >> 8, + kvm_trace_symbol_hwr_cop), + (__entry->reg >> 3) & 0x1f, + __entry->reg & 0x7, + __entry->val) +); + +#define KVM_TRACE_AUX_RESTORE 0 +#define KVM_TRACE_AUX_SAVE 1 +#define KVM_TRACE_AUX_ENABLE 2 +#define KVM_TRACE_AUX_DISABLE 3 +#define KVM_TRACE_AUX_DISCARD 4 + +#define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_MSA 2 +#define KVM_TRACE_AUX_FPU_MSA 3 + +#define kvm_trace_symbol_aux_op \ + { KVM_TRACE_AUX_RESTORE, "restore" }, \ + { KVM_TRACE_AUX_SAVE, "save" }, \ + { KVM_TRACE_AUX_ENABLE, "enable" }, \ + { KVM_TRACE_AUX_DISABLE, "disable" }, \ + { KVM_TRACE_AUX_DISCARD, "discard" } + +#define kvm_trace_symbol_aux_state \ + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_MSA, "MSA" }, \ + { KVM_TRACE_AUX_FPU_MSA, "FPU & MSA" } + +TRACE_EVENT(kvm_aux, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, + unsigned int state), + TP_ARGS(vcpu, op, state), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, op) + __field(u8, state) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->op = op; + __entry->state = state; + ), + + TP_printk("%s %s PC: 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_aux_op), + __print_symbolic(__entry->state, + kvm_trace_symbol_aux_state), + __entry->pc) +); + +TRACE_EVENT(kvm_asid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int old_asid, + unsigned int new_asid), + TP_ARGS(vcpu, old_asid, new_asid), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, old_asid) + __field(u8, new_asid) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->old_asid = old_asid; + __entry->new_asid = new_asid; + ), + + TP_printk("PC: 0x%08lx old: 0x%02x new: 0x%02x", + __entry->pc, + __entry->old_asid, + __entry->new_asid) +); + +TRACE_EVENT(kvm_guestid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid), + TP_ARGS(vcpu, guestid), + TP_STRUCT__entry( + __field(unsigned int, guestid) + ), + + TP_fast_assign( + __entry->guestid = guestid; + ), + + TP_printk("GuestID: 0x%02x", + __entry->guestid) +); + +TRACE_EVENT_FN(kvm_guest_mode_change, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, epc) + __field(unsigned long, pc) + __field(unsigned long, badvaddr) + __field(unsigned int, status) + __field(unsigned int, cause) + ), + + TP_fast_assign( + __entry->epc = kvm_read_c0_guest_epc(&vcpu->arch.cop0); + __entry->pc = vcpu->arch.pc; + __entry->badvaddr = kvm_read_c0_guest_badvaddr(&vcpu->arch.cop0); + __entry->status = kvm_read_c0_guest_status(&vcpu->arch.cop0); + __entry->cause = kvm_read_c0_guest_cause(&vcpu->arch.cop0); + ), + + TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx", + __entry->epc, + __entry->pc, + __entry->status, + __entry->cause, + __entry->badvaddr), + + kvm_guest_mode_change_trace_reg, + kvm_guest_mode_change_trace_unreg +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c new file mode 100644 index 0000000000..99d5a71e43 --- /dev/null +++ b/arch/mips/kvm/vz.c @@ -0,0 +1,3325 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Support for hardware virtualization extensions + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Yann Le Du <ledu@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/preempt.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> +#include <asm/cacheops.h> +#include <asm/cmpxchg.h> +#include <asm/fpu.h> +#include <asm/hazards.h> +#include <asm/inst.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> +#include <asm/time.h> +#include <asm/tlb.h> +#include <asm/tlbex.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" +#ifdef CONFIG_CPU_LOONGSON64 +#include "loongson_regs.h" +#endif + +#include "trace.h" + +/* Pointers to last VCPU loaded on each physical CPU */ +static struct kvm_vcpu *last_vcpu[NR_CPUS]; +/* Pointers to last VCPU executed on each physical CPU */ +static struct kvm_vcpu *last_exec_vcpu[NR_CPUS]; + +/* + * Number of guest VTLB entries to use, so we can catch inconsistency between + * CPUs. + */ +static unsigned int kvm_vz_guest_vtlb_size; + +static inline long kvm_vz_read_gc0_ebase(void) +{ + if (sizeof(long) == 8 && cpu_has_ebase_wg) + return read_gc0_ebase_64(); + else + return read_gc0_ebase(); +} + +static inline void kvm_vz_write_gc0_ebase(long v) +{ + /* + * First write with WG=1 to write upper bits, then write again in case + * WG should be left at 0. + * write_gc0_ebase_64() is no longer UNDEFINED since R6. + */ + if (sizeof(long) == 8 && + (cpu_has_mips64r6 || cpu_has_ebase_wg)) { + write_gc0_ebase_64(v | MIPS_EBASE_WG); + write_gc0_ebase_64(v); + } else { + write_gc0_ebase(v | MIPS_EBASE_WG); + write_gc0_ebase(v); + } +} + +/* + * These Config bits may be writable by the guest: + * Config: [K23, KU] (!TLB), K0 + * Config1: (none) + * Config2: [TU, SU] (impl) + * Config3: ISAOnExc + * Config4: FTLBPageSize + * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR + */ + +static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return CONF_CM_CMASK; +} + +static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return MIPS_CONF3_ISA_OE; +} + +static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu) +{ + /* no need to be exact */ + return MIPS_CONF4_VFTLBPAGESIZE; +} + +static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI; + + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_ufr) + mask |= MIPS_CONF5_UFR; + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE; + } + + return mask; +} + +static inline unsigned int kvm_vz_config6_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return LOONGSON_CONF6_INTIMER | LOONGSON_CONF6_EXTIMER; +} + +/* + * VZ optionally allows these additional Config bits to be written by root: + * Config: M, [MT] + * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP + * Config2: M + * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC, + * VInt, SP, CDMM, MT, SM, TL] + * Config4: M, [VTLBSizeExt, MMUSizeExt] + * Config5: MRP + */ + +static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; +} + +static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M | + MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; +} + +static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP; +} + +static inline unsigned int kvm_vz_config6_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config6_guest_wrmask(vcpu) | + LOONGSON_CONF6_SFBEN | LOONGSON_CONF6_FTLBDIS; +} + +static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva) +{ + /* VZ guest has already converted gva to gpa */ + return gva; +} + +static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + set_bit(priority, &vcpu->arch.pending_exceptions); + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + clear_bit(priority, &vcpu->arch.pending_exceptions); + set_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_queue_irq(vcpu, kvm_irq_to_priority(intr)); +} + +static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_dequeue_irq(vcpu, kvm_irq_to_priority(-intr)); +} + +static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + set_gc0_cause(C_TI); + break; + + case MIPS_EXC_INT_IO_1: + case MIPS_EXC_INT_IO_2: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + if (cpu_has_guestctl2) + set_c0_guestctl2(irq); + else + set_gc0_cause(irq); + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions); + return 1; +} + +static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + /* + * Explicitly clear irq associated with Cause.IP[IPTI] + * if GuestCtl2 virtual interrupt register not + * supported or if not using GuestCtl2 Hardware Clear. + */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + case MIPS_EXC_INT_IO_1: + case MIPS_EXC_INT_IO_2: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + /* Clear GuestCtl2.VIP irq if not using Hardware Clear */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); + return 1; +} + +/* + * VZ guest timer handling. + */ + +/** + * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer. + * @vcpu: Virtual CPU. + * + * Returns: true if the VZ GTOffset & real guest CP0_Count should be used + * instead of software emulation of guest timer. + * false otherwise. + */ +static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu) +{ + if (kvm_mips_count_disabled(vcpu)) + return false; + + /* Chosen frequency must match real frequency */ + if (mips_hpt_frequency != vcpu->arch.count_hz) + return false; + + /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */ + if (current_cpu_data.gtoffset_mask != 0xffffffff) + return false; + + return true; +} + +/** + * _kvm_vz_restore_stimer() - Restore soft timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore VZ state relating to the soft timer. The hard timer can be enabled + * later. + */ +static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare, + u32 cause) +{ + /* + * Avoid spurious counter interrupts by setting Guest CP0_Count to just + * after Guest CP0_Compare. + */ + write_c0_gtoffset(compare - read_c0_count()); + + back_to_back_c0_hazard(); + write_gc0_cause(cause); +} + +/** + * _kvm_vz_restore_htimer() - Restore hard timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the + * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause. + */ +static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, + u32 compare, u32 cause) +{ + u32 start_count, after_count; + unsigned long flags; + + /* + * Freeze the soft-timer and sync the guest CP0_Count with it. We do + * this with interrupts disabled to avoid latency. + */ + local_irq_save(flags); + kvm_mips_freeze_hrtimer(vcpu, &start_count); + write_c0_gtoffset(start_count - read_c0_count()); + local_irq_restore(flags); + + /* restore guest CP0_Cause, as TI may already be set */ + back_to_back_c0_hazard(); + write_gc0_cause(cause); + + /* + * The above sequence isn't atomic and would result in lost timer + * interrupts if we're not careful. Detect if a timer interrupt is due + * and assert it. + */ + back_to_back_c0_hazard(); + after_count = read_gc0_count(); + if (after_count - start_count > compare - start_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +/** + * kvm_vz_restore_timer() - Restore timer state. + * @vcpu: Virtual CPU. + * + * Restore soft timer state from saved context. + */ +static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + u32 cause, compare; + + compare = kvm_read_sw_gc0_compare(cop0); + cause = kvm_read_sw_gc0_cause(cop0); + + write_gc0_compare(compare); + _kvm_vz_restore_stimer(vcpu, compare, cause); +} + +/** + * kvm_vz_acquire_htimer() - Switch to hard timer state. + * @vcpu: Virtual CPU. + * + * Restore hard timer state on top of existing soft timer state if possible. + * + * Since hard timer won't remain active over preemption, preemption should be + * disabled by the caller. + */ +void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0; + + gctl0 = read_c0_guestctl0(); + if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) { + /* enable guest access to hard timer */ + write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT); + + _kvm_vz_restore_htimer(vcpu, read_gc0_compare(), + read_gc0_cause()); + } +} + +/** + * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. + * @vcpu: Virtual CPU. + * @out_compare: Pointer to write compare value to. + * @out_cause: Pointer to write cause value to. + * + * Save VZ guest timer state and switch to software emulation of guest CP0 + * timer. The hard timer must already be in use, so preemption should be + * disabled. + */ +static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu, + u32 *out_compare, u32 *out_cause) +{ + u32 cause, compare, before_count, end_count; + ktime_t before_time; + + compare = read_gc0_compare(); + *out_compare = compare; + + before_time = ktime_get(); + + /* + * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time + * at which no pending timer interrupt is missing. + */ + before_count = read_gc0_count(); + back_to_back_c0_hazard(); + cause = read_gc0_cause(); + *out_cause = cause; + + /* + * Record a final CP0_Count which we will transfer to the soft-timer. + * This is recorded *after* saving CP0_Cause, so we don't get any timer + * interrupts from just after the final CP0_Count point. + */ + back_to_back_c0_hazard(); + end_count = read_gc0_count(); + + /* + * The above sequence isn't atomic, so we could miss a timer interrupt + * between reading CP0_Cause and end_count. Detect and record any timer + * interrupt due between before_count and end_count. + */ + if (end_count - before_count > compare - before_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); + + /* + * Restore soft-timer, ignoring a small amount of negative drift due to + * delay between freeze_hrtimer and setting CP0_GTOffset. + */ + kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000); +} + +/** + * kvm_vz_save_timer() - Save guest timer state. + * @vcpu: Virtual CPU. + * + * Save VZ guest timer state and switch to soft guest timer if hard timer was in + * use. + */ +static void kvm_vz_save_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + u32 gctl0, compare, cause; + + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of hard timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* save hard timer state */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + } else { + compare = read_gc0_compare(); + cause = read_gc0_cause(); + } + + /* save timer-related state to VCPU context */ + kvm_write_sw_gc0_cause(cop0, cause); + kvm_write_sw_gc0_compare(cop0, compare); +} + +/** + * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use. + * @vcpu: Virtual CPU. + * + * Transfers the state of the hard guest timer to the soft guest timer, leaving + * guest state intact so it can continue to be used with the soft timer. + */ +void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0, compare, cause; + + preempt_disable(); + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* switch to soft timer */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + + /* leave soft timer in usable state */ + _kvm_vz_restore_stimer(vcpu, compare, cause); + } + preempt_enable(); +} + +/** + * is_eva_access() - Find whether an instruction is an EVA memory accessor. + * @inst: 32-bit instruction encoding. + * + * Finds whether @inst encodes an EVA memory access instruction, which would + * indicate that emulation of it should access the user mode address space + * instead of the kernel mode address space. This matters for MUSUK segments + * which are TLB mapped for user mode but unmapped for kernel mode. + * + * Returns: Whether @inst encodes an EVA accessor instruction. + */ +static bool is_eva_access(union mips_instruction inst) +{ + if (inst.spec3_format.opcode != spec3_op) + return false; + + switch (inst.spec3_format.func) { + case lwle_op: + case lwre_op: + case cachee_op: + case sbe_op: + case she_op: + case sce_op: + case swe_op: + case swle_op: + case swre_op: + case prefe_op: + case lbue_op: + case lhue_op: + case lbe_op: + case lhe_op: + case lle_op: + case lwe_op: + return true; + default: + return false; + } +} + +/** + * is_eva_am_mapped() - Find whether an access mode is mapped. + * @vcpu: KVM VCPU state. + * @am: 3-bit encoded access mode. + * @eu: Segment becomes unmapped and uncached when Status.ERL=1. + * + * Decode @am to find whether it encodes a mapped segment for the current VCPU + * state. Where necessary @eu and the actual instruction causing the fault are + * taken into account to make the decision. + * + * Returns: Whether the VCPU faulted on a TLB mapped address. + */ +static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu) +{ + u32 am_lookup; + int err; + + /* + * Interpret access control mode. We assume address errors will already + * have been caught by the guest, leaving us with: + * AM UM SM KM 31..24 23..16 + * UK 0 000 Unm 0 0 + * MK 1 001 TLB 1 + * MSK 2 010 TLB TLB 1 + * MUSK 3 011 TLB TLB TLB 1 + * MUSUK 4 100 TLB TLB Unm 0 1 + * USK 5 101 Unm Unm 0 0 + * - 6 110 0 0 + * UUSK 7 111 Unm Unm Unm 0 0 + * + * We shift a magic value by AM across the sign bit to find if always + * TLB mapped, and if not shift by 8 again to find if it depends on KM. + */ + am_lookup = 0x70080000 << am; + if ((s32)am_lookup < 0) { + /* + * MK, MSK, MUSK + * Always TLB mapped, unless SegCtl.EU && ERL + */ + if (!eu || !(read_gc0_status() & ST0_ERL)) + return true; + } else { + am_lookup <<= 8; + if ((s32)am_lookup < 0) { + union mips_instruction inst; + unsigned int status; + u32 *opc; + + /* + * MUSUK + * TLB mapped if not in kernel mode + */ + status = read_gc0_status(); + if (!(status & (ST0_EXL | ST0_ERL)) && + (status & ST0_KSU)) + return true; + /* + * EVA access instructions in kernel + * mode access user address space. + */ + opc = (u32 *)vcpu->arch.pc; + if (vcpu->arch.host_cp0_cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (!err && is_eva_access(inst)) + return true; + } + } + + return false; +} + +/** + * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA. + * @vcpu: KVM VCPU state. + * @gva: Guest virtual address to convert. + * @gpa: Output guest physical address. + * + * Convert a guest virtual address (GVA) which is valid according to the guest + * context, to a guest physical address (GPA). + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + u32 gva32 = gva; + unsigned long segctl; + + if ((long)gva == (s32)gva32) { + /* Handle canonical 32-bit virtual address */ + if (cpu_guest_has_segments) { + unsigned long mask, pa; + + switch (gva32 >> 29) { + case 0: + case 1: /* CFG5 (1GB) */ + segctl = read_gc0_segctl2() >> 16; + mask = (unsigned long)0xfc0000000ull; + break; + case 2: + case 3: /* CFG4 (1GB) */ + segctl = read_gc0_segctl2(); + mask = (unsigned long)0xfc0000000ull; + break; + case 4: /* CFG3 (512MB) */ + segctl = read_gc0_segctl1() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 5: /* CFG2 (512MB) */ + segctl = read_gc0_segctl1(); + mask = (unsigned long)0xfe0000000ull; + break; + case 6: /* CFG1 (512MB) */ + segctl = read_gc0_segctl0() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 7: /* CFG0 (512MB) */ + segctl = read_gc0_segctl0(); + mask = (unsigned long)0xfe0000000ull; + break; + default: + /* + * GCC 4.9 isn't smart enough to figure out that + * segctl and mask are always initialised. + */ + unreachable(); + } + + if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7, + segctl & 0x0008)) + goto tlb_mapped; + + /* Unmapped, find guest physical address */ + pa = (segctl << 20) & mask; + pa |= gva32 & ~mask; + *gpa = pa; + return 0; + } else if ((s32)gva32 < (s32)0xc0000000) { + /* legacy unmapped KSeg0 or KSeg1 */ + *gpa = gva32 & 0x1fffffff; + return 0; + } +#ifdef CONFIG_64BIT + } else if ((gva & 0xc000000000000000) == 0x8000000000000000) { + /* XKPHYS */ + if (cpu_guest_has_segments) { + /* + * Each of the 8 regions can be overridden by SegCtl2.XR + * to use SegCtl1.XAM. + */ + segctl = read_gc0_segctl2(); + if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) { + segctl = read_gc0_segctl1(); + if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7, + 0)) + goto tlb_mapped; + } + + } + /* + * Traditionally fully unmapped. + * Bits 61:59 specify the CCA, which we can just mask off here. + * Bits 58:PABITS should be zero, but we shouldn't have got here + * if it wasn't. + */ + *gpa = gva & 0x07ffffffffffffff; + return 0; +#endif + } + +tlb_mapped: + return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa); +} + +/** + * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA. + * @vcpu: KVM VCPU state. + * @badvaddr: Root BadVAddr. + * @gpa: Output guest physical address. + * + * VZ implementations are permitted to report guest virtual addresses (GVA) in + * BadVAddr on a root exception during guest execution, instead of the more + * convenient guest physical addresses (GPA). When we get a GVA, this function + * converts it to a GPA, taking into account guest segmentation and guest TLB + * state. + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr, + unsigned long *gpa) +{ + unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + + /* If BadVAddr is GPA, then all is well in the world */ + if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) { + *gpa = badvaddr; + return 0; + } + + /* Otherwise we'd expect it to be GVA ... */ + if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA, + "Unexpected gexccode %#x\n", gexccode)) + return -EINVAL; + + /* ... and we need to perform the GVA->GPA translation in software */ + return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa); +} + +static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 inst = 0; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + read_gc0_status()); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + +static unsigned long mips_process_maar(unsigned int op, unsigned long val) +{ + /* Mask off unused bits */ + unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL; + + if (read_gc0_pagegrain() & PG_ELPA) + mask |= 0x00ffffff00000000ull; + if (cpu_guest_has_mvh) + mask |= MIPS_MAAR_VH; + + /* Set or clear VH */ + if (op == mtc_op) { + /* clear VH */ + val &= ~MIPS_MAAR_VH; + } else if (op == dmtc_op) { + /* set VH to match VL */ + val &= ~MIPS_MAAR_VH; + if (val & MIPS_MAAR_VL) + val |= MIPS_MAAR_VH; + } + + return val & mask; +} + +static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + + val &= MIPS_MAARI_INDEX; + if (val == MIPS_MAARI_INDEX) + kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1); + else if (val < ARRAY_SIZE(vcpu->arch.maar)) + kvm_write_sw_gc0_maari(cop0, val); +} + +static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + enum emulation_result er = EMULATE_DONE; + u32 rt, rd, sel; + unsigned long curr_pc; + unsigned long val; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + if (inst.co_format.co) { + switch (inst.co_format.func) { + case wait_op: + er = kvm_mips_emul_wait(vcpu); + break; + default: + er = EMULATE_FAIL; + } + } else { + rt = inst.c0r_format.rt; + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + switch (inst.c0r_format.rs) { + case dmfc_op: + case mfc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + val = kvm_mips_read_count(vcpu); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + val = read_gc0_compare(); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + if (cpu_guest_has_rw_llb) + val = read_gc0_lladdr() & + MIPS_LLADDR_LLB; + else + val = 0; + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + val = vcpu->arch.maar[ + kvm_read_sw_gc0_maari(cop0)]; + } else if ((rd == MIPS_CP0_PRID && + (sel == 0 || /* PRid */ + sel == 2 || /* CDMMBase */ + sel == 3)) || /* CMGCRBase */ + (rd == MIPS_CP0_STATUS && + (sel == 2 || /* SRSCtl */ + sel == 3)) || /* SRSMap */ + (rd == MIPS_CP0_CONFIG && + (sel == 6 || /* Config6 */ + sel == 7)) || /* Config7 */ + (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) || + (rd == MIPS_CP0_ERRCTL && + (sel == 0))) { /* ErrCtl */ + val = cop0->reg[rd][sel]; +#ifdef CONFIG_CPU_LOONGSON64 + } else if (rd == MIPS_CP0_DIAG && + (sel == 0)) { /* Diag */ + val = cop0->reg[rd][sel]; +#endif + } else { + val = 0; + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) { + /* Sign extend */ + if (inst.c0r_format.rs == mfc_op) + val = (int)val; + vcpu->arch.gprs[rt] = val; + } + + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ? + KVM_TRACE_MFC0 : KVM_TRACE_DMFC0, + KVM_TRACE_COP0(rd, sel), val); + break; + + case dmtc_op: + case mtc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + val = vcpu->arch.gprs[rt]; + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ? + KVM_TRACE_MTC0 : KVM_TRACE_DMTC0, + KVM_TRACE_COP0(rd, sel), val); + + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + kvm_vz_lose_htimer(vcpu); + kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + kvm_mips_write_compare(vcpu, + vcpu->arch.gprs[rt], + true); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + /* + * P5600 generates GPSI on guest MTC0 LLAddr. + * Only allow the guest to clear LLB. + */ + if (cpu_guest_has_rw_llb && + !(val & MIPS_LLADDR_LLB)) + write_gc0_lladdr(0); + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + val = mips_process_maar(inst.c0r_format.rs, + val); + + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] = + val; + } else if (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + kvm_write_maari(vcpu, val); + } else if (rd == MIPS_CP0_CONFIG && + (sel == 6)) { + cop0->reg[rd][sel] = (int)val; + } else if (rd == MIPS_CP0_ERRCTL && + (sel == 0)) { /* ErrCtl */ + /* ignore the written value */ +#ifdef CONFIG_CPU_LOONGSON64 + } else if (rd == MIPS_CP0_DIAG && + (sel == 0)) { /* Diag */ + unsigned long flags; + + local_irq_save(flags); + if (val & LOONGSON_DIAG_BTB) { + /* Flush BTB */ + set_c0_diag(LOONGSON_DIAG_BTB); + } + if (val & LOONGSON_DIAG_ITLB) { + /* Flush ITLB */ + set_c0_diag(LOONGSON_DIAG_ITLB); + } + if (val & LOONGSON_DIAG_DTLB) { + /* Flush DTLB */ + set_c0_diag(LOONGSON_DIAG_DTLB); + } + if (val & LOONGSON_DIAG_VTLB) { + /* Flush VTLB */ + kvm_loongson_clear_guest_vtlb(); + } + if (val & LOONGSON_DIAG_FTLB) { + /* Flush FTLB */ + kvm_loongson_clear_guest_ftlb(); + } + local_irq_restore(flags); +#endif + } else { + er = EMULATE_FAIL; + } + break; + + default: + er = EMULATE_FAIL; + break; + } + } + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n", + curr_pc, __func__, inst.word); + + vcpu->arch.pc = curr_pc; + } + + return er; +} + +static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + u32 cache, op_inst, op, base; + s16 offset; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long va, curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + base = inst.i_format.rs; + op_inst = inst.i_format.rt; + if (cpu_has_mips_r6) + offset = inst.spec3_format.simmediate; + else + offset = inst.i_format.simmediate; + cache = op_inst & CacheOp_Cache; + op = op_inst & CacheOp_Op; + + va = arch->gprs[base] + offset; + + kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); + + /* Secondary or tirtiary cache ops ignored */ + if (cache != Cache_I && cache != Cache_D) + return EMULATE_DONE; + + switch (op_inst) { + case Index_Invalidate_I: + flush_icache_line_indexed(va); + return EMULATE_DONE; + case Index_Writeback_Inv_D: + flush_dcache_line_indexed(va); + return EMULATE_DONE; + case Hit_Invalidate_I: + case Hit_Invalidate_D: + case Hit_Writeback_Inv_D: + if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) { + /* We can just flush entire icache */ + local_flush_icache_range(0, 0); + return EMULATE_DONE; + } + + /* So far, other platforms support guest hit cache ops */ + break; + default: + break; + } + + kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], + offset); + /* Rollback PC */ + vcpu->arch.pc = curr_pc; + + return EMULATE_FAIL; +} + +#ifdef CONFIG_CPU_LOONGSON64 +static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_vcpu *vcpu) +{ + unsigned int rs, rd; + unsigned int hostcfg; + unsigned long curr_pc; + enum emulation_result er = EMULATE_DONE; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + rs = inst.loongson3_lscsr_format.rs; + rd = inst.loongson3_lscsr_format.rd; + switch (inst.loongson3_lscsr_format.fr) { + case 0x8: /* Read CPUCFG */ + ++vcpu->stat.vz_cpucfg_exits; + hostcfg = read_cpucfg(vcpu->arch.gprs[rs]); + + switch (vcpu->arch.gprs[rs]) { + case LOONGSON_CFG0: + vcpu->arch.gprs[rd] = 0x14c000; + break; + case LOONGSON_CFG1: + hostcfg &= (LOONGSON_CFG1_FP | LOONGSON_CFG1_MMI | + LOONGSON_CFG1_MSA1 | LOONGSON_CFG1_MSA2 | + LOONGSON_CFG1_SFBP); + vcpu->arch.gprs[rd] = hostcfg; + break; + case LOONGSON_CFG2: + hostcfg &= (LOONGSON_CFG2_LEXT1 | LOONGSON_CFG2_LEXT2 | + LOONGSON_CFG2_LEXT3 | LOONGSON_CFG2_LSPW); + vcpu->arch.gprs[rd] = hostcfg; + break; + case LOONGSON_CFG3: + vcpu->arch.gprs[rd] = hostcfg; + break; + default: + /* Don't export any other advanced features to guest */ + vcpu->arch.gprs[rd] = 0; + break; + } + break; + + default: + kvm_err("lwc2 emulate not impl %d rs %lx @%lx\n", + inst.loongson3_lscsr_format.fr, vcpu->arch.gprs[rs], curr_pc); + er = EMULATE_FAIL; + break; + } + + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported lwc2 instruction 0x%08x 0x%08x\n", + curr_pc, __func__, inst.word, inst.loongson3_lscsr_format.fr); + + vcpu->arch.pc = curr_pc; + } + + return er; +} +#endif + +static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + union mips_instruction inst; + int rd, rt, sel; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + switch (inst.r_format.opcode) { + case cop0_op: + er = kvm_vz_gpsi_cop0(inst, opc, cause, vcpu); + break; +#ifndef CONFIG_CPU_MIPSR6 + case cache_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu); + break; +#endif +#ifdef CONFIG_CPU_LOONGSON64 + case lwc2_op: + er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu); + break; +#endif + case spec3_op: + switch (inst.spec3_format.func) { +#ifdef CONFIG_CPU_MIPSR6 + case cache6_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu); + break; +#endif + case rdhwr_op: + if (inst.r_format.rs || (inst.r_format.re >> 3)) + goto unknown; + + rd = inst.r_format.rd; + rt = inst.r_format.rt; + sel = inst.r_format.re & 0x7; + + switch (rd) { + case MIPS_HWR_CC: /* Read count register */ + arch->gprs[rt] = + (long)(int)kvm_mips_read_count(vcpu); + break; + default: + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), 0); + goto unknown; + } + + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); + + er = update_pc(vcpu, cause); + break; + default: + goto unknown; + } + break; +unknown: + + default: + kvm_err("GPSI exception not supported (%p/%#x)\n", + opc, inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + er = EMULATE_FAIL; + break; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + union mips_instruction inst; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* complete MTC0 on behalf of guest and advance EPC */ + if (inst.c0r_format.opcode == cop0_op && + inst.c0r_format.rs == mtc_op && + inst.c0r_format.z == 0) { + int rt = inst.c0r_format.rt; + int rd = inst.c0r_format.rd; + int sel = inst.c0r_format.sel; + unsigned int val = arch->gprs[rt]; + unsigned int old_val, change; + + trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel), + val); + + if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { + /* FR bit should read as zero if no FPU */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't support + * it. + */ + if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + old_val = read_gc0_status(); + change = val ^ old_val; + + if (change & ST0_FR) { + /* + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. + */ + kvm_drop_fpu(vcpu); + } + + /* + * If MSA state is already live, it is undefined how it + * interacts with FR=0 FPU state, and we don't want to + * hit reserved instruction exceptions trying to save + * the MSA state later when CU=1 && FR=1, so play it + * safe and save it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + write_gc0_status(val); + } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { + u32 old_cause = read_gc0_cause(); + u32 change = old_cause ^ val; + + /* DC bit enabling/disabling timer? */ + if (change & CAUSEF_DC) { + if (val & CAUSEF_DC) { + kvm_vz_lose_htimer(vcpu); + kvm_mips_count_disable_cause(vcpu); + } else { + kvm_mips_count_enable_cause(vcpu); + } + } + + /* Only certain bits are RW to the guest */ + change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP | + CAUSEF_IP0 | CAUSEF_IP1); + + /* WP can only be cleared */ + change &= ~CAUSEF_WP | old_cause; + + write_gc0_cause(old_cause ^ change); + } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */ + write_gc0_intctl(val); + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + old_val = read_gc0_config5(); + change = val ^ old_val; + /* Handle changes in FPU/MSA modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + preempt_enable(); + + val = old_val ^ + (change & kvm_vz_config5_guest_wrmask(vcpu)); + write_gc0_config5(val); + } else { + kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) + er = update_pc(vcpu, cause); + } else { + kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + /* + * Presumably this is due to MC (guest mode change), so lets trace some + * relevant info. + */ + trace_kvm_guest_mode_change(vcpu); + + return EMULATE_DONE; +} + +static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + unsigned long curr_pc; + int err; + + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + er = kvm_mips_emul_hypcall(vcpu, inst); + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; + + return er; +} + +static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode, + u32 cause, + u32 *opc, + struct kvm_vcpu *vcpu) +{ + u32 inst; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n", + gexccode, opc, inst, read_gc0_status()); + + return EMULATE_FAIL; +} + +static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + u32 gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + int ret = RESUME_GUEST; + + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode); + switch (gexccode) { + case MIPS_GCTL0_GEXC_GPSI: + ++vcpu->stat.vz_gpsi_exits; + er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GSFC: + ++vcpu->stat.vz_gsfc_exits; + er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_HC: + ++vcpu->stat.vz_hc_exits; + er = kvm_trap_vz_handle_hc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GRR: + ++vcpu->stat.vz_grr_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GVA: + ++vcpu->stat.vz_gva_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GHFC: + ++vcpu->stat.vz_ghfc_exits; + er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GPA: + ++vcpu->stat.vz_gpa_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + default: + ++vcpu->stat.vz_resvd_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_HYPERCALL) { + ret = kvm_mips_handle_hypcall(vcpu); + } else { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +/** + * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use a coprocessor which hasn't been allowed + * by the root context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) + */ +static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) +{ + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_FAIL; + int ret = RESUME_GUEST; + + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* + * If guest FPU not present, the FPU operation should have been + * treated as a reserved instruction! + * If FPU already in use, we shouldn't get this at all. + */ + if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { + preempt_enable(); + return EMULATE_FAIL; + } + + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + /* other coprocessors not handled */ + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + +/** + * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled in the root + * context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) + */ +static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + /* + * If MSA not present or not exposed to guest or FR=0, the MSA operation + * should have been treated as a reserved instruction! + * Same if CU1=1, FR=0. + * If MSA already in use, we shouldn't get this at all. + */ + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 || + !(read_gc0_config5() & MIPS_CONF5_MSAEN) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_msa(vcpu); + + return RESUME_GUEST; +} + +static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err, ret = RESUME_GUEST; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) { + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_load(inst, cause, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err; + int ret = RESUME_GUEST; + + /* Just try the access again if we couldn't do the translation */ + if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr)) + return RESUME_GUEST; + vcpu->arch.host_cp0_badvaddr = badvaddr; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) { + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_store(inst, cause, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static u64 kvm_vz_get_one_regs[] = { + KVM_REG_MIPS_CP0_INDEX, + KVM_REG_MIPS_CP0_ENTRYLO0, + KVM_REG_MIPS_CP0_ENTRYLO1, + KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_PAGEMASK, + KVM_REG_MIPS_CP0_PAGEGRAIN, + KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, + KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, + KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, + KVM_REG_MIPS_CP0_STATUS, + KVM_REG_MIPS_CP0_INTCTL, + KVM_REG_MIPS_CP0_CAUSE, + KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, + KVM_REG_MIPS_CP0_EBASE, + KVM_REG_MIPS_CP0_CONFIG, + KVM_REG_MIPS_CP0_CONFIG1, + KVM_REG_MIPS_CP0_CONFIG2, + KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, + KVM_REG_MIPS_CP0_CONFIG6, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXT, +#endif + KVM_REG_MIPS_CP0_ERROREPC, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, +}; + +static u64 kvm_vz_get_one_regs_contextconfig[] = { + KVM_REG_MIPS_CP0_CONTEXTCONFIG, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXTCONFIG, +#endif +}; + +static u64 kvm_vz_get_one_regs_segments[] = { + KVM_REG_MIPS_CP0_SEGCTL0, + KVM_REG_MIPS_CP0_SEGCTL1, + KVM_REG_MIPS_CP0_SEGCTL2, +}; + +static u64 kvm_vz_get_one_regs_htw[] = { + KVM_REG_MIPS_CP0_PWBASE, + KVM_REG_MIPS_CP0_PWFIELD, + KVM_REG_MIPS_CP0_PWSIZE, + KVM_REG_MIPS_CP0_PWCTL, +}; + +static u64 kvm_vz_get_one_regs_kscratch[] = { + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, +}; + +static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_vz_get_one_regs); + if (cpu_guest_has_userlocal) + ++ret; + if (cpu_guest_has_badinstr) + ++ret; + if (cpu_guest_has_badinstrp) + ++ret; + if (cpu_guest_has_contextconfig) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + if (cpu_guest_has_segments) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + if (cpu_guest_has_htw || cpu_guest_has_ldpte) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) + ret += 1 + ARRAY_SIZE(vcpu->arch.maar); + ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask); + + return ret; +} + +static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_vz_get_one_regs, + sizeof(kvm_vz_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs); + + if (cpu_guest_has_userlocal) { + index = KVM_REG_MIPS_CP0_USERLOCAL; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstr) { + index = KVM_REG_MIPS_CP0_BADINSTR; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstrp) { + index = KVM_REG_MIPS_CP0_BADINSTRP; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_contextconfig) { + if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig, + sizeof(kvm_vz_get_one_regs_contextconfig))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + } + if (cpu_guest_has_segments) { + if (copy_to_user(indices, kvm_vz_get_one_regs_segments, + sizeof(kvm_vz_get_one_regs_segments))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + } + if (cpu_guest_has_htw || cpu_guest_has_ldpte) { + if (copy_to_user(indices, kvm_vz_get_one_regs_htw, + sizeof(kvm_vz_get_one_regs_htw))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + } + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) { + index = KVM_REG_MIPS_CP0_MAAR(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + + index = KVM_REG_MIPS_CP0_MAARI; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + for (i = 0; i < 6; ++i) { + if (!cpu_guest_has_kscr(i + 2)) + continue; + + if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i], + sizeof(kvm_vz_get_one_regs_kscratch[i]))) + return -EFAULT; + ++indices; + } + + return 0; +} + +static inline s64 entrylo_kvm_to_user(unsigned long v) +{ + s64 mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit version of the register, so move the + * RI/XI bits up into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= ((s64)v & mask) << 32; + } + return ret; +} + +static inline unsigned long entrylo_user_to_kvm(s64 v) +{ + unsigned long mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit versiono of the register, so move the + * RI/XI bits down into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= (v >> 32) & mask; + } + return ret; +} + +static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 *v) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + unsigned int idx; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + *v = (long)read_gc0_index(); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + *v = entrylo_kvm_to_user(read_gc0_entrylo0()); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + *v = entrylo_kvm_to_user(read_gc0_entrylo1()); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + *v = (long)read_gc0_context(); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_contextconfig(); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + *v = read_gc0_userlocal(); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_xcontextconfig(); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + *v = (long)read_gc0_pagemask(); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + *v = (long)read_gc0_pagegrain(); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl0(); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl1(); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl2(); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + *v = read_gc0_pwbase(); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + *v = read_gc0_pwfield(); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + *v = read_gc0_pwsize(); + break; + case KVM_REG_MIPS_CP0_WIRED: + *v = (long)read_gc0_wired(); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + *v = read_gc0_pwctl(); + break; + case KVM_REG_MIPS_CP0_HWRENA: + *v = (long)read_gc0_hwrena(); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + *v = (long)read_gc0_badvaddr(); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + *v = read_gc0_badinstr(); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + *v = read_gc0_badinstrp(); + break; + case KVM_REG_MIPS_CP0_COUNT: + *v = kvm_mips_read_count(vcpu); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + *v = (long)read_gc0_entryhi(); + break; + case KVM_REG_MIPS_CP0_COMPARE: + *v = (long)read_gc0_compare(); + break; + case KVM_REG_MIPS_CP0_STATUS: + *v = (long)read_gc0_status(); + break; + case KVM_REG_MIPS_CP0_INTCTL: + *v = read_gc0_intctl(); + break; + case KVM_REG_MIPS_CP0_CAUSE: + *v = (long)read_gc0_cause(); + break; + case KVM_REG_MIPS_CP0_EPC: + *v = (long)read_gc0_epc(); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a read-only guest.PRid */ + *v = read_gc0_prid(); + break; + default: + *v = (long)kvm_read_c0_guest_prid(cop0); + break; + } + break; + case KVM_REG_MIPS_CP0_EBASE: + *v = kvm_vz_read_gc0_ebase(); + break; + case KVM_REG_MIPS_CP0_CONFIG: + *v = read_gc0_config(); + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + return -EINVAL; + *v = read_gc0_config1(); + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + return -EINVAL; + *v = read_gc0_config2(); + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + return -EINVAL; + *v = read_gc0_config3(); + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + return -EINVAL; + *v = read_gc0_config4(); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + return -EINVAL; + *v = read_gc0_config5(); + break; + case KVM_REG_MIPS_CP0_CONFIG6: + *v = kvm_read_sw_gc0_config6(cop0); + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + *v = vcpu->arch.maar[idx]; + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + *v = kvm_read_sw_gc0_maari(&vcpu->arch.cop0); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + *v = read_gc0_xcontext(); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + *v = (long)read_gc0_errorepc(); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + *v = (long)read_gc0_kscratch1(); + break; + case 3: + *v = (long)read_gc0_kscratch2(); + break; + case 4: + *v = (long)read_gc0_kscratch3(); + break; + case 5: + *v = (long)read_gc0_kscratch4(); + break; + case 6: + *v = (long)read_gc0_kscratch5(); + break; + case 7: + *v = (long)read_gc0_kscratch6(); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + *v = vcpu->arch.count_ctl; + break; + case KVM_REG_MIPS_COUNT_RESUME: + *v = ktime_to_ns(vcpu->arch.count_resume); + break; + case KVM_REG_MIPS_COUNT_HZ: + *v = vcpu->arch.count_hz; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + unsigned int idx; + int ret = 0; + unsigned int cur, change; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + write_gc0_index(v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + write_gc0_entrylo0(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + write_gc0_entrylo1(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + write_gc0_context(v); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_contextconfig(v); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + write_gc0_userlocal(v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_xcontextconfig(v); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + write_gc0_pagemask(v); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + write_gc0_pagegrain(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl0(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl1(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl2(v); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + write_gc0_pwbase(v); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + write_gc0_pwfield(v); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + write_gc0_pwsize(v); + break; + case KVM_REG_MIPS_CP0_WIRED: + change_gc0_wired(MIPSR6_WIRED_WIRED, v); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) + return -EINVAL; + write_gc0_pwctl(v); + break; + case KVM_REG_MIPS_CP0_HWRENA: + write_gc0_hwrena(v); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + write_gc0_badvaddr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + write_gc0_badinstr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + write_gc0_badinstrp(v); + break; + case KVM_REG_MIPS_CP0_COUNT: + kvm_mips_write_count(vcpu, v); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + write_gc0_entryhi(v); + break; + case KVM_REG_MIPS_CP0_COMPARE: + kvm_mips_write_compare(vcpu, v, false); + break; + case KVM_REG_MIPS_CP0_STATUS: + write_gc0_status(v); + break; + case KVM_REG_MIPS_CP0_INTCTL: + write_gc0_intctl(v); + break; + case KVM_REG_MIPS_CP0_CAUSE: + /* + * If the timer is stopped or started (DC bit) it must look + * atomic with changes to the timer interrupt pending bit (TI). + * A timer interrupt should not happen in between. + */ + if ((read_gc0_cause() ^ v) & CAUSEF_DC) { + if (v & CAUSEF_DC) { + /* disable timer first */ + kvm_mips_count_disable_cause(vcpu); + change_gc0_cause((u32)~CAUSEF_DC, v); + } else { + /* enable timer last */ + change_gc0_cause((u32)~CAUSEF_DC, v); + kvm_mips_count_enable_cause(vcpu); + } + } else { + write_gc0_cause(v); + } + break; + case KVM_REG_MIPS_CP0_EPC: + write_gc0_epc(v); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a guest.PRid, but its read-only */ + break; + default: + kvm_write_c0_guest_prid(cop0, v); + break; + } + break; + case KVM_REG_MIPS_CP0_EBASE: + kvm_vz_write_gc0_ebase(v); + break; + case KVM_REG_MIPS_CP0_CONFIG: + cur = read_gc0_config(); + change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + break; + cur = read_gc0_config1(); + change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config1(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + break; + cur = read_gc0_config2(); + change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config2(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + break; + cur = read_gc0_config3(); + change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config3(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + break; + cur = read_gc0_config4(); + change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config4(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + break; + cur = read_gc0_config5(); + change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config5(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG6: + cur = kvm_read_sw_gc0_config6(cop0); + change = (cur ^ v) & kvm_vz_config6_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_sw_gc0_config6(cop0, (int)v); + } + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v); + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + kvm_write_maari(vcpu, v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + write_gc0_xcontext(v); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + write_gc0_errorepc(v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + write_gc0_kscratch1(v); + break; + case 3: + write_gc0_kscratch2(v); + break; + case 4: + write_gc0_kscratch3(v); + break; + case 5: + write_gc0_kscratch4(v); + break; + case 6: + write_gc0_kscratch5(v); + break; + case 7: + write_gc0_kscratch6(v); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + ret = kvm_mips_set_count_ctl(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_RESUME: + ret = kvm_mips_set_count_resume(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_set_count_hz(vcpu, v); + break; + default: + return -EINVAL; + } + return ret; +} + +#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache) +static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu) +{ + unsigned long guestid = guestid_cache(cpu); + + if (!(++guestid & GUESTID_MASK)) { + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (!guestid) /* fix version if needed */ + guestid = GUESTID_FIRST_VERSION; + + ++guestid; /* guestid 0 reserved for root */ + + /* start new guestid cycle */ + kvm_vz_local_flush_roottlb_all_guests(); + kvm_vz_local_flush_guesttlb_all(); + } + + guestid_cache(cpu) = guestid; +} + +/* Returns 1 if the guest TLB may be clobbered */ +static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) +{ + int ret = 0; + int i; + + if (!kvm_request_pending(vcpu)) + return 0; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + if (cpu_has_guestid) { + /* Drop all GuestIDs for this VCPU */ + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + /* This will clobber guest TLB contents too */ + ret = 1; + } + /* + * For Root ASID Dealias (RAD) we don't do anything here, but we + * still need the request to ensure we recheck asid_flush_mask. + * We can still return 0 as only the root TLB will be affected + * by a root ASID flush. + */ + } + + return ret; +} + +static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu) +{ + unsigned int wired = read_gc0_wired(); + struct kvm_mips_tlb *tlbs; + int i; + + /* Expand the wired TLB array if necessary */ + wired &= MIPSR6_WIRED_WIRED; + if (wired > vcpu->arch.wired_tlb_limit) { + tlbs = krealloc(vcpu->arch.wired_tlb, wired * + sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC); + if (WARN_ON(!tlbs)) { + /* Save whatever we can */ + wired = vcpu->arch.wired_tlb_limit; + } else { + vcpu->arch.wired_tlb = tlbs; + vcpu->arch.wired_tlb_limit = wired; + } + } + + if (wired) + /* Save wired entries from the guest TLB */ + kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired); + /* Invalidate any dropped entries since last time */ + for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) { + vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + vcpu->arch.wired_tlb[i].tlb_lo[0] = 0; + vcpu->arch.wired_tlb[i].tlb_lo[1] = 0; + vcpu->arch.wired_tlb[i].tlb_mask = 0; + } + vcpu->arch.wired_tlb_used = wired; +} + +static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu) +{ + /* Load wired entries into the guest TLB */ + if (vcpu->arch.wired_tlb) + kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0, + vcpu->arch.wired_tlb_used); +} + +static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm *kvm = vcpu->kvm; + struct mm_struct *gpa_mm = &kvm->arch.gpa_mm; + bool migrated; + + /* + * Are we entering guest context on a different CPU to last time? + * If so, the VCPU's guest TLB state on this CPU may be stale. + */ + migrated = (vcpu->arch.last_exec_cpu != cpu); + vcpu->arch.last_exec_cpu = cpu; + + /* + * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and + * remains set until another vcpu is loaded in. As a rule GuestRID + * remains zeroed when in root context unless the kernel is busy + * manipulating guest tlb entries. + */ + if (cpu_has_guestid) { + /* + * Check if our GuestID is of an older version and thus invalid. + * + * We also discard the stored GuestID if we've executed on + * another CPU, as the guest mappings may have changed without + * hypervisor knowledge. + */ + if (migrated || + (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) & + GUESTID_VERSION_MASK) { + kvm_vz_get_new_guestid(cpu, vcpu); + vcpu->arch.vzguestid[cpu] = guestid_cache(cpu); + trace_kvm_guestid_change(vcpu, + vcpu->arch.vzguestid[cpu]); + } + + /* Restore GuestID */ + change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]); + } else { + /* + * The Guest TLB only stores a single guest's TLB state, so + * flush it if another VCPU has executed on this CPU. + * + * We also flush if we've executed on another CPU, as the guest + * mappings may have changed without hypervisor knowledge. + */ + if (migrated || last_exec_vcpu[cpu] != vcpu) + kvm_vz_local_flush_guesttlb_all(); + last_exec_vcpu[cpu] = vcpu; + + /* + * Root ASID dealiases guest GPA mappings in the root TLB. + * Allocate new root ASID if needed. + */ + if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask)) + get_new_mmu_context(gpa_mm); + else + check_mmu_context(gpa_mm); + } +} + +static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + bool migrated, all; + + /* + * Have we migrated to a different CPU? + * If so, any old guest TLB state may be stale. + */ + migrated = (vcpu->arch.last_sched_cpu != cpu); + + /* + * Was this the last VCPU to run on this CPU? + * If not, any old guest state from this VCPU will have been clobbered. + */ + all = migrated || (last_vcpu[cpu] != vcpu); + last_vcpu[cpu] = vcpu; + + /* + * Restore CP0_Wired unconditionally as we clear it after use, and + * restore wired guest TLB entries (while in guest context). + */ + kvm_restore_gc0_wired(cop0); + if (current->flags & PF_VCPU) { + tlbw_use_hazard(); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + } + + /* + * Restore timer state regardless, as e.g. Cause.TI can change over time + * if left unmaintained. + */ + kvm_vz_restore_timer(vcpu); + + /* Set MC bit if we want to trace guest mode changes */ + if (kvm_trace_guest_mode_change) + set_c0_guestctl0(MIPS_GCTL0_MC); + else + clear_c0_guestctl0(MIPS_GCTL0_MC); + + /* Don't bother restoring registers multiple times unless necessary */ + if (!all) + return 0; + + /* + * Restore config registers first, as some implementations restrict + * writes to other registers when the corresponding feature bits aren't + * set. For example Status.CU1 cannot be set unless Config1.FP is set. + */ + kvm_restore_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_restore_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_restore_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_restore_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_restore_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_restore_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_restore_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_restore_gc0_config7(cop0); + + kvm_restore_gc0_index(cop0); + kvm_restore_gc0_entrylo0(cop0); + kvm_restore_gc0_entrylo1(cop0); + kvm_restore_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_restore_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_xcontextconfig(cop0); +#endif + kvm_restore_gc0_pagemask(cop0); + kvm_restore_gc0_pagegrain(cop0); + kvm_restore_gc0_hwrena(cop0); + kvm_restore_gc0_badvaddr(cop0); + kvm_restore_gc0_entryhi(cop0); + kvm_restore_gc0_status(cop0); + kvm_restore_gc0_intctl(cop0); + kvm_restore_gc0_epc(cop0); + kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0)); + if (cpu_guest_has_userlocal) + kvm_restore_gc0_userlocal(cop0); + + kvm_restore_gc0_errorepc(cop0); + + /* restore KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_restore_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_restore_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_restore_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_restore_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_restore_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_restore_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_restore_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_restore_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_restore_gc0_segctl0(cop0); + kvm_restore_gc0_segctl1(cop0); + kvm_restore_gc0_segctl2(cop0); + } + + /* restore HTW registers */ + if (cpu_guest_has_htw || cpu_guest_has_ldpte) { + kvm_restore_gc0_pwbase(cop0); + kvm_restore_gc0_pwfield(cop0); + kvm_restore_gc0_pwsize(cop0); + kvm_restore_gc0_pwctl(cop0); + } + + /* restore Root.GuestCtl2 from unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + write_c0_guestctl2( + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]); + + /* + * We should clear linked load bit to break interrupted atomics. This + * prevents a SC on the next VCPU from succeeding by matching a LL on + * the previous VCPU. + */ + if (vcpu->kvm->created_vcpus > 1) + write_gc0_lladdr(0); + + return 0; +} + +static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + + if (current->flags & PF_VCPU) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_lose_fpu(vcpu); + + kvm_save_gc0_index(cop0); + kvm_save_gc0_entrylo0(cop0); + kvm_save_gc0_entrylo1(cop0); + kvm_save_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_save_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_xcontextconfig(cop0); +#endif + kvm_save_gc0_pagemask(cop0); + kvm_save_gc0_pagegrain(cop0); + kvm_save_gc0_wired(cop0); + /* allow wired TLB entries to be overwritten */ + clear_gc0_wired(MIPSR6_WIRED_WIRED); + kvm_save_gc0_hwrena(cop0); + kvm_save_gc0_badvaddr(cop0); + kvm_save_gc0_entryhi(cop0); + kvm_save_gc0_status(cop0); + kvm_save_gc0_intctl(cop0); + kvm_save_gc0_epc(cop0); + kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase()); + if (cpu_guest_has_userlocal) + kvm_save_gc0_userlocal(cop0); + + /* only save implemented config registers */ + kvm_save_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_save_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_save_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_save_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_save_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_save_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_save_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_save_gc0_config7(cop0); + + kvm_save_gc0_errorepc(cop0); + + /* save KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_save_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_save_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_save_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_save_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_save_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_save_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_save_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_save_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_save_gc0_segctl0(cop0); + kvm_save_gc0_segctl1(cop0); + kvm_save_gc0_segctl2(cop0); + } + + /* save HTW registers if enabled in guest */ + if (cpu_guest_has_ldpte || (cpu_guest_has_htw && + kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW)) { + kvm_save_gc0_pwbase(cop0); + kvm_save_gc0_pwfield(cop0); + kvm_save_gc0_pwsize(cop0); + kvm_save_gc0_pwctl(cop0); + } + + kvm_vz_save_timer(vcpu); + + /* save Root.GuestCtl2 in unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = + read_c0_guestctl2(); + + return 0; +} + +/** + * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB. + * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries). + * + * Attempt to resize the guest VTLB by writing guest Config registers. This is + * necessary for cores with a shared root/guest TLB to avoid overlap with wired + * entries in the root VTLB. + * + * Returns: The resulting guest VTLB size. + */ +static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) +{ + unsigned int config4 = 0, ret = 0, limit; + + /* Write MMUSize - 1 into guest Config registers */ + if (cpu_guest_has_conf1) + change_gc0_config1(MIPS_CONF1_TLBS, + (size - 1) << MIPS_CONF1_TLBS_SHIFT); + if (cpu_guest_has_conf4) { + config4 = read_gc0_config4(); + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) { + config4 &= ~MIPS_CONF4_VTLBSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_VTLBSIZEEXT_SHIFT; + } else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) { + config4 &= ~MIPS_CONF4_MMUSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_MMUSIZEEXT_SHIFT; + } + write_gc0_config4(config4); + } + + /* + * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it + * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write + * not dropped) + */ + if (cpu_has_mips_r6) { + limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >> + MIPSR6_WIRED_LIMIT_SHIFT; + if (size - 1 <= limit) + limit = 0; + write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT); + } + + /* Read back MMUSize - 1 */ + back_to_back_c0_hazard(); + if (cpu_guest_has_conf1) + ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >> + MIPS_CONF1_TLBS_SHIFT; + if (config4) { + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) + ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >> + MIPS_CONF4_VTLBSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) + ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >> + MIPS_CONF4_MMUSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + } + return ret + 1; +} + +static int kvm_vz_hardware_enable(void) +{ + unsigned int mmu_size, guest_mmu_size, ftlb_size; + u64 guest_cvmctl, cvmvmconfig; + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Set up guest timer/perfcount IRQ lines */ + guest_cvmctl = read_gc0_cvmctl(); + guest_cvmctl &= ~CVMCTL_IPTI; + guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT; + guest_cvmctl &= ~CVMCTL_IPPCI; + guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT; + write_gc0_cvmctl(guest_cvmctl); + + cvmvmconfig = read_c0_cvmvmconfig(); + /* No I/O hole translation. */ + cvmvmconfig |= CVMVMCONF_DGHT; + /* Halve the root MMU size */ + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + guest_mmu_size = mmu_size / 2; + mmu_size -= guest_mmu_size; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = guest_mmu_size; + + /* Flush moved entries in new (guest) context */ + kvm_vz_local_flush_guesttlb_all(); + break; + default: + /* + * ImgTec cores tend to use a shared root/guest TLB. To avoid + * overlap of root wired and guest entries, the guest TLB may + * need resizing. + */ + mmu_size = current_cpu_data.tlbsizevtlb; + ftlb_size = current_cpu_data.tlbsize - mmu_size; + + /* Try switching to maximum guest VTLB size for flush */ + guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + kvm_vz_local_flush_guesttlb_all(); + + /* + * Reduce to make space for root wired entries and at least 2 + * root non-wired entries. This does assume that long-term wired + * entries won't be added later. + */ + guest_mmu_size = mmu_size - num_wired_entries() - 2; + guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + + /* + * Write the VTLB size, but if another CPU has already written, + * check it matches or we won't provide a consistent view to the + * guest. If this ever happens it suggests an asymmetric number + * of wired entries. + */ + if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) && + WARN(guest_mmu_size != kvm_vz_guest_vtlb_size, + "Available guest VTLB size mismatch")) + return -EINVAL; + break; + } + + /* + * Enable virtualization features granting guest direct control of + * certain features: + * CP0=1: Guest coprocessor 0 context. + * AT=Guest: Guest MMU. + * CG=1: Hit (virtual address) CACHE operations (optional). + * CF=1: Guest Config registers. + * CGI=1: Indexed flush CACHE operations (optional). + */ + write_c0_guestctl0(MIPS_GCTL0_CP0 | + (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) | + MIPS_GCTL0_CG | MIPS_GCTL0_CF); + if (cpu_has_guestctl0ext) { + if (current_cpu_type() != CPU_LOONGSON64) + set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + else + clear_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + } + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + + GUESTID_MASK = current_cpu_data.guestid_mask; + GUESTID_FIRST_VERSION = GUESTID_MASK + 1; + GUESTID_VERSION_MASK = ~GUESTID_MASK; + + current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION; + } + + /* clear any pending injected virtual guest interrupts */ + if (cpu_has_guestctl2) + clear_c0_guestctl2(0x3f << 10); + +#ifdef CONFIG_CPU_LOONGSON64 + /* Control guest CCA attribute */ + if (cpu_has_csr()) + csr_writel(csr_readl(0xffffffec) | 0x1, 0xffffffec); +#endif + + return 0; +} + +static void kvm_vz_hardware_disable(void) +{ + u64 cvmvmconfig; + unsigned int mmu_size; + + /* Flush any remaining guest TLB entries */ + kvm_vz_local_flush_guesttlb_all(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* + * Allocate whole TLB for root. Existing guest TLB entries will + * change ownership to the root TLB. We should be safe though as + * they've already been flushed above while in guest TLB. + */ + cvmvmconfig = read_c0_cvmvmconfig(); + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = 0; + + /* Flush moved entries in new (root) context */ + local_flush_tlb_all(); + break; + } + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + } +} + +static int kvm_vz_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_MIPS_VZ: + /* we wouldn't be here unless cpu_has_vz */ + r = 1; + break; +#ifdef CONFIG_64BIT + case KVM_CAP_MIPS_64BIT: + /* We support 64-bit registers/operations and addresses */ + r = 2; + break; +#endif + case KVM_CAP_IOEVENTFD: + r = 1; + break; + default: + r = 0; + break; + } + + return r; +} + +static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + + return 0; +} + +static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + int cpu; + + /* + * If the VCPU is freed and reused as another VCPU, we don't want the + * matching pointer wrongly hanging around in last_vcpu[] or + * last_exec_vcpu[]. + */ + for_each_possible_cpu(cpu) { + if (last_vcpu[cpu] == vcpu) + last_vcpu[cpu] = NULL; + if (last_exec_vcpu[cpu] == vcpu) + last_exec_vcpu[cpu] = NULL; + } +} + +static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = &vcpu->arch.cop0; + unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */ + + /* + * Start off the timer at the same frequency as the host timer, but the + * soft timer doesn't handle frequencies greater than 1GHz yet. + */ + if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC) + count_hz = mips_hpt_frequency; + kvm_mips_init_count(vcpu, count_hz); + + /* + * Initialize guest register state to valid architectural reset state. + */ + + /* PageGrain */ + if (cpu_has_mips_r5 || cpu_has_mips_r6) + kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); + /* Wired */ + if (cpu_has_mips_r6) + kvm_write_sw_gc0_wired(cop0, + read_gc0_wired() & MIPSR6_WIRED_LIMIT); + /* Status */ + kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); + if (cpu_has_mips_r5 || cpu_has_mips_r6) + kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); + /* IntCtl */ + kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & + (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI)); + /* PRId */ + kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id); + /* EBase */ + kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id); + /* Config */ + kvm_save_gc0_config(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK, + _page_cachable_default >> _CACHE_SHIFT); + /* architecturally read only, but maybe writable from root */ + kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config()); + if (cpu_guest_has_conf1) { + kvm_set_sw_gc0_config(cop0, MIPS_CONF_M); + /* Config1 */ + kvm_save_gc0_config1(cop0); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 | + MIPS_CONF1_MD | + MIPS_CONF1_PC | + MIPS_CONF1_WR | + MIPS_CONF1_CA | + MIPS_CONF1_FP); + } + if (cpu_guest_has_conf2) { + kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M); + /* Config2 */ + kvm_save_gc0_config2(cop0); + } + if (cpu_guest_has_conf3) { + kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M); + /* Config3 */ + kvm_save_gc0_config3(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA | + MIPS_CONF3_BPG | + MIPS_CONF3_ULRI | + MIPS_CONF3_DSP | + MIPS_CONF3_CTXTC | + MIPS_CONF3_ITL | + MIPS_CONF3_LPA | + MIPS_CONF3_VEIC | + MIPS_CONF3_VINT | + MIPS_CONF3_SP | + MIPS_CONF3_CDMM | + MIPS_CONF3_MT | + MIPS_CONF3_SM | + MIPS_CONF3_TL); + } + if (cpu_guest_has_conf4) { + kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M); + /* Config4 */ + kvm_save_gc0_config4(cop0); + } + if (cpu_guest_has_conf5) { + kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M); + /* Config5 */ + kvm_save_gc0_config5(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K | + MIPS_CONF5_CV | + MIPS_CONF5_MSAEN | + MIPS_CONF5_UFE | + MIPS_CONF5_FRE | + MIPS_CONF5_SBRI | + MIPS_CONF5_UFR); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP); + } + + if (cpu_guest_has_contextconfig) { + /* ContextConfig */ + kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0); +#ifdef CONFIG_64BIT + /* XContextConfig */ + /* bits SEGBITS-13+3:4 set */ + kvm_write_sw_gc0_xcontextconfig(cop0, + ((1ull << (cpu_vmbits - 13)) - 1) << 4); +#endif + } + + /* Implementation dependent, use the legacy layout */ + if (cpu_guest_has_segments) { + /* SegCtl0, SegCtl1, SegCtl2 */ + kvm_write_sw_gc0_segctl0(cop0, 0x00200010); + kvm_write_sw_gc0_segctl1(cop0, 0x00000002 | + (_page_cachable_default >> _CACHE_SHIFT) << + (16 + MIPS_SEGCFG_C_SHIFT)); + kvm_write_sw_gc0_segctl2(cop0, 0x00380438); + } + + /* reset HTW registers */ + if (cpu_guest_has_htw && (cpu_has_mips_r5 || cpu_has_mips_r6)) { + /* PWField */ + kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); + /* PWSize */ + kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT); + } + + /* start with no pending virtual guest interrupts */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0; + + /* Put PC at reset vector */ + vcpu->arch.pc = CKSEG1ADDR(0x1fc00000); + + return 0; +} + +static void kvm_vz_prepare_flush_shadow(struct kvm *kvm) +{ + if (!cpu_has_guestid) { + /* + * For each CPU there is a single GPA ASID used by all VCPUs in + * the VM, so it doesn't make sense for the VCPUs to handle + * invalidation of these ASIDs individually. + * + * Instead mark all CPUs as needing ASID invalidation in + * asid_flush_mask, and kvm_flush_remote_tlbs(kvm) will + * kick any running VCPUs so they check asid_flush_mask. + */ + cpumask_setall(&kvm->arch.asid_flush_mask); + } +} + +static void kvm_vz_vcpu_reenter(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int preserve_guest_tlb; + + preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_vz_vcpu_load_tlb(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_load_wired(vcpu); +} + +static int kvm_vz_vcpu_run(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int r; + + kvm_vz_acquire_htimer(vcpu); + /* Check if we have any exceptions/interrupts pending */ + kvm_mips_deliver_interrupts(vcpu, read_gc0_cause()); + + kvm_vz_check_requests(vcpu, cpu); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + + r = vcpu->arch.vcpu_run(vcpu); + + kvm_vz_vcpu_save_wired(vcpu); + + return r; +} + +static struct kvm_mips_callbacks kvm_vz_callbacks = { + .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable, + .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss, + .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss, + .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss, + .handle_addr_err_st = kvm_trap_vz_no_handler, + .handle_addr_err_ld = kvm_trap_vz_no_handler, + .handle_syscall = kvm_trap_vz_no_handler, + .handle_res_inst = kvm_trap_vz_no_handler, + .handle_break = kvm_trap_vz_no_handler, + .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, + .handle_guest_exit = kvm_trap_vz_handle_guest_exit, + + .hardware_enable = kvm_vz_hardware_enable, + .hardware_disable = kvm_vz_hardware_disable, + .check_extension = kvm_vz_check_extension, + .vcpu_init = kvm_vz_vcpu_init, + .vcpu_uninit = kvm_vz_vcpu_uninit, + .vcpu_setup = kvm_vz_vcpu_setup, + .prepare_flush_shadow = kvm_vz_prepare_flush_shadow, + .gva_to_gpa = kvm_vz_gva_to_gpa_cb, + .queue_timer_int = kvm_vz_queue_timer_int_cb, + .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb, + .queue_io_int = kvm_vz_queue_io_int_cb, + .dequeue_io_int = kvm_vz_dequeue_io_int_cb, + .irq_deliver = kvm_vz_irq_deliver_cb, + .irq_clear = kvm_vz_irq_clear_cb, + .num_regs = kvm_vz_num_regs, + .copy_reg_indices = kvm_vz_copy_reg_indices, + .get_one_reg = kvm_vz_get_one_reg, + .set_one_reg = kvm_vz_set_one_reg, + .vcpu_load = kvm_vz_vcpu_load, + .vcpu_put = kvm_vz_vcpu_put, + .vcpu_run = kvm_vz_vcpu_run, + .vcpu_reenter = kvm_vz_vcpu_reenter, +}; + +/* FIXME: Get rid of the callbacks now that trap-and-emulate is gone. */ +const struct kvm_mips_callbacks * const kvm_mips_callbacks = &kvm_vz_callbacks; + +int kvm_mips_emulation_init(void) +{ + if (!cpu_has_vz) + return -ENODEV; + + /* + * VZ requires at least 2 KScratch registers, so it should have been + * possible to allocate pgd_reg. + */ + if (WARN(pgd_reg == -1, + "pgd_reg not allocated even though cpu_has_vz\n")) + return -ENODEV; + + pr_info("Starting KVM with MIPS VZ extensions\n"); + return 0; +} |