summaryrefslogtreecommitdiffstats
path: root/arch/loongarch
diff options
context:
space:
mode:
Diffstat (limited to 'arch/loongarch')
-rw-r--r--arch/loongarch/Kbuild2
-rw-r--r--arch/loongarch/Kconfig30
-rw-r--r--arch/loongarch/Makefile7
-rw-r--r--arch/loongarch/configs/loongson3_defconfig2
-rw-r--r--arch/loongarch/crypto/crc32-loongarch.c2
-rw-r--r--arch/loongarch/include/asm/acpi.h4
-rw-r--r--arch/loongarch/include/asm/atomic.h88
-rw-r--r--arch/loongarch/include/asm/inst.h29
-rw-r--r--arch/loongarch/include/asm/jump_label.h4
-rw-r--r--arch/loongarch/include/asm/kvm_csr.h211
-rw-r--r--arch/loongarch/include/asm/kvm_host.h237
-rw-r--r--arch/loongarch/include/asm/kvm_mmu.h139
-rw-r--r--arch/loongarch/include/asm/kvm_types.h11
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h93
-rw-r--r--arch/loongarch/include/asm/local.h27
-rw-r--r--arch/loongarch/include/asm/loongarch.h24
-rw-r--r--arch/loongarch/include/asm/percpu.h11
-rw-r--r--arch/loongarch/include/asm/pgalloc.h1
-rw-r--r--arch/loongarch/include/uapi/asm/kvm.h108
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/acpi.c4
-rw-r--r--arch/loongarch/kernel/asm-offsets.c56
-rw-r--r--arch/loongarch/kernel/efi.c8
-rw-r--r--arch/loongarch/kernel/image-vars.h2
-rw-r--r--arch/loongarch/kernel/setup.c11
-rw-r--r--arch/loongarch/kernel/smp.c123
-rw-r--r--arch/loongarch/kvm/Kconfig40
-rw-r--r--arch/loongarch/kvm/Makefile22
-rw-r--r--arch/loongarch/kvm/exit.c696
-rw-r--r--arch/loongarch/kvm/interrupt.c183
-rw-r--r--arch/loongarch/kvm/main.c420
-rw-r--r--arch/loongarch/kvm/mmu.c914
-rw-r--r--arch/loongarch/kvm/switch.S250
-rw-r--r--arch/loongarch/kvm/timer.c197
-rw-r--r--arch/loongarch/kvm/tlb.c32
-rw-r--r--arch/loongarch/kvm/trace.h162
-rw-r--r--arch/loongarch/kvm/vcpu.c939
-rw-r--r--arch/loongarch/kvm/vm.c94
-rw-r--r--arch/loongarch/mm/kasan_init.c3
-rw-r--r--arch/loongarch/mm/tlb.c16
-rw-r--r--arch/loongarch/net/bpf_jit.c149
-rw-r--r--arch/loongarch/vdso/Makefile11
42 files changed, 5164 insertions, 200 deletions
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild
index b01f5cdb27..beb8499dd8 100644
--- a/arch/loongarch/Kbuild
+++ b/arch/loongarch/Kbuild
@@ -3,5 +3,7 @@ obj-y += mm/
obj-y += net/
obj-y += vdso/
+obj-$(CONFIG_KVM) += kvm/
+
# for cleaning
subdir- += boot
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e14396a2dd..205956041d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -11,6 +11,7 @@ config LOONGARCH
select ARCH_DISABLE_KASAN_INLINE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
@@ -97,6 +98,7 @@ config LOONGARCH
select HAVE_ARCH_KFENCE
select HAVE_ARCH_KGDB if PERF_EVENTS
select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
@@ -129,12 +131,14 @@ config LOONGARCH
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
+ select HAVE_KVM
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK
select HAVE_RSEQ
@@ -263,6 +267,9 @@ config AS_HAS_LASX_EXTENSION
config AS_HAS_LBT_EXTENSION
def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+config AS_HAS_LVZ_EXTENSION
+ def_bool $(as-instr,hvcl 0)
+
menu "Kernel type and options"
source "kernel/Kconfig.hz"
@@ -603,23 +610,6 @@ config RANDOMIZE_BASE_MAX_OFFSET
This is limited by the size of the lower address memory, 256MB.
-config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- default y
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y. Only embedded should say N here.
-
endmenu
config ARCH_SELECT_MEMORY_MODEL
@@ -638,10 +628,6 @@ config ARCH_SPARSEMEM_ENABLE
or have huge holes in the physical address space for other reasons.
See <file:Documentation/mm/numa.rst> for more.
-config ARCH_ENABLE_THP_MIGRATION
- def_bool y
- depends on TRANSPARENT_HUGEPAGE
-
config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
@@ -676,3 +662,5 @@ source "kernel/power/Kconfig"
source "drivers/acpi/Kconfig"
endmenu
+
+source "arch/loongarch/kvm/Kconfig"
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index d423fba7c4..4ba8d67ddb 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,9 @@ LDFLAGS_vmlinux += -static -n -nostdlib
ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
cflags-y += $(call cc-option,-mexplicit-relocs)
KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data)
+KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
+KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
else
@@ -136,9 +139,7 @@ vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/loongarch/vdso include/generated/vdso-offsets.h
endif
-PHONY += vdso_install
-vdso_install:
- $(Q)$(MAKE) $(build)=arch/loongarch/vdso $@
+vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
all: $(notdir $(KBUILD_IMAGE))
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index a3b52aaa83..33795e4a5b 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -66,6 +66,8 @@ CONFIG_EFI_ZBOOT=y
CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
CONFIG_EFI_CAPSULE_LOADER=m
CONFIG_EFI_TEST=m
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
index 1f2a2c3839..a49e507af3 100644
--- a/arch/loongarch/crypto/crc32-loongarch.c
+++ b/arch/loongarch/crypto/crc32-loongarch.c
@@ -239,7 +239,6 @@ static struct shash_alg crc32_alg = {
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksum_cra_init,
@@ -261,7 +260,6 @@ static struct shash_alg crc32c_alg = {
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksumc_cra_init,
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 8de6c4b83a..49e29b2999 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -32,8 +32,10 @@ static inline bool acpi_has_cpu_in_madt(void)
return true;
}
+#define MAX_CORE_PIC 256
+
extern struct list_head acpi_wakeup_device_list;
-extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
extern int __init parse_acpi_topology(void);
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index e27f0c72d3..99af8b3160 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -36,19 +36,19 @@
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__asm__ __volatile__( \
- "am"#asm_op"_db.w" " $zero, %1, %0 \n" \
+ "am"#asm_op".w" " $zero, %1, %0 \n" \
: "+ZB" (v->counter) \
: "r" (I) \
: "memory"); \
}
-#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
-static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
+#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
+static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \
{ \
int result; \
\
__asm__ __volatile__( \
- "am"#asm_op"_db.w" " %1, %2, %0 \n" \
+ "am"#asm_op#mb".w" " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
return result c_op I; \
}
-#define ATOMIC_FETCH_OP(op, I, asm_op) \
-static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \
+static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \
{ \
int result; \
\
__asm__ __volatile__( \
- "am"#asm_op"_db.w" " %1, %2, %0 \n" \
+ "am"#asm_op#mb".w" " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
#define ATOMIC_OPS(op, I, asm_op, c_op) \
ATOMIC_OP(op, I, asm_op) \
- ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
- ATOMIC_FETCH_OP(op, I, asm_op)
+ ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \
+ ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
+ ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
+ ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC_OPS(add, i, add, +)
ATOMIC_OPS(sub, -i, add, +)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_add_return_acquire arch_atomic_add_return
+#define arch_atomic_add_return_release arch_atomic_add_return
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return
+#define arch_atomic_sub_return_release arch_atomic_sub_return
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, I, asm_op) \
ATOMIC_OP(op, I, asm_op) \
- ATOMIC_FETCH_OP(op, I, asm_op)
+ ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
+ ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC_OPS(and, i, and)
ATOMIC_OPS(or, i, or)
ATOMIC_OPS(xor, i, xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
@@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
static inline void arch_atomic64_##op(long i, atomic64_t *v) \
{ \
__asm__ __volatile__( \
- "am"#asm_op"_db.d " " $zero, %1, %0 \n" \
+ "am"#asm_op".d " " $zero, %1, %0 \n" \
: "+ZB" (v->counter) \
: "r" (I) \
: "memory"); \
}
-#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
-static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \
+#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
+static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \
{ \
long result; \
__asm__ __volatile__( \
- "am"#asm_op"_db.d " " %1, %2, %0 \n" \
+ "am"#asm_op#mb".d " " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \
return result c_op I; \
}
-#define ATOMIC64_FETCH_OP(op, I, asm_op) \
-static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
+#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \
+static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \
{ \
long result; \
\
__asm__ __volatile__( \
- "am"#asm_op"_db.d " " %1, %2, %0 \n" \
+ "am"#asm_op#mb".d " " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
#define ATOMIC64_OPS(op, I, asm_op, c_op) \
ATOMIC64_OP(op, I, asm_op) \
- ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
- ATOMIC64_FETCH_OP(op, I, asm_op)
+ ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \
+ ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
+ ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
+ ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC64_OPS(add, i, add, +)
ATOMIC64_OPS(sub, -i, add, +)
+#define arch_atomic64_add_return arch_atomic64_add_return
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return
+#define arch_atomic64_add_return_release arch_atomic64_add_return
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, I, asm_op) \
ATOMIC64_OP(op, I, asm_op) \
- ATOMIC64_FETCH_OP(op, I, asm_op)
+ ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
+ ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC64_OPS(and, i, and)
ATOMIC64_OPS(or, i, or)
ATOMIC64_OPS(xor, i, xor)
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 71e1ed4165..d8f637f9e4 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -65,6 +65,16 @@ enum reg2_op {
revbd_op = 0x0f,
revh2w_op = 0x10,
revhd_op = 0x11,
+ extwh_op = 0x16,
+ extwb_op = 0x17,
+ iocsrrdb_op = 0x19200,
+ iocsrrdh_op = 0x19201,
+ iocsrrdw_op = 0x19202,
+ iocsrrdd_op = 0x19203,
+ iocsrwrb_op = 0x19204,
+ iocsrwrh_op = 0x19205,
+ iocsrwrw_op = 0x19206,
+ iocsrwrd_op = 0x19207,
};
enum reg2i5_op {
@@ -318,6 +328,13 @@ struct reg2bstrd_format {
unsigned int opcode : 10;
};
+struct reg2csr_format {
+ unsigned int rd : 5;
+ unsigned int rj : 5;
+ unsigned int csr : 14;
+ unsigned int opcode : 8;
+};
+
struct reg3_format {
unsigned int rd : 5;
unsigned int rj : 5;
@@ -346,6 +363,7 @@ union loongarch_instruction {
struct reg2i14_format reg2i14_format;
struct reg2i16_format reg2i16_format;
struct reg2bstrd_format reg2bstrd_format;
+ struct reg2csr_format reg2csr_format;
struct reg3_format reg3_format;
struct reg3sa2_format reg3sa2_format;
};
@@ -556,6 +574,8 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op)
DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op)
DEF_EMIT_REG2_FORMAT(revbd, revbd_op)
+DEF_EMIT_REG2_FORMAT(extwh, extwh_op)
+DEF_EMIT_REG2_FORMAT(extwb, extwb_op)
#define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
@@ -607,6 +627,9 @@ DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op)
DEF_EMIT_REG2I12_FORMAT(andi, andi_op)
DEF_EMIT_REG2I12_FORMAT(ori, ori_op)
DEF_EMIT_REG2I12_FORMAT(xori, xori_op)
+DEF_EMIT_REG2I12_FORMAT(ldb, ldb_op)
+DEF_EMIT_REG2I12_FORMAT(ldh, ldh_op)
+DEF_EMIT_REG2I12_FORMAT(ldw, ldw_op)
DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op)
DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op)
DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op)
@@ -685,9 +708,12 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
insn->reg3_format.rk = rk; \
}
+DEF_EMIT_REG3_FORMAT(addw, addw_op)
DEF_EMIT_REG3_FORMAT(addd, addd_op)
DEF_EMIT_REG3_FORMAT(subd, subd_op)
DEF_EMIT_REG3_FORMAT(muld, muld_op)
+DEF_EMIT_REG3_FORMAT(divd, divd_op)
+DEF_EMIT_REG3_FORMAT(modd, modd_op)
DEF_EMIT_REG3_FORMAT(divdu, divdu_op)
DEF_EMIT_REG3_FORMAT(moddu, moddu_op)
DEF_EMIT_REG3_FORMAT(and, and_op)
@@ -699,6 +725,9 @@ DEF_EMIT_REG3_FORMAT(srlw, srlw_op)
DEF_EMIT_REG3_FORMAT(srld, srld_op)
DEF_EMIT_REG3_FORMAT(sraw, sraw_op)
DEF_EMIT_REG3_FORMAT(srad, srad_op)
+DEF_EMIT_REG3_FORMAT(ldxb, ldxb_op)
+DEF_EMIT_REG3_FORMAT(ldxh, ldxh_op)
+DEF_EMIT_REG3_FORMAT(ldxw, ldxw_op)
DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op)
DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op)
DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op)
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h
index 3cea299a5e..29acfe3de3 100644
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -22,7 +22,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
@@ -35,7 +35,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h
new file mode 100644
index 0000000000..724ca8b7b4
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_csr.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef __ASM_LOONGARCH_KVM_CSR_H__
+#define __ASM_LOONGARCH_KVM_CSR_H__
+
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+#include <asm/loongarch.h>
+#include <asm/kvm_vcpu.h>
+
+#define gcsr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ " gcsrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define gcsr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ " gcsrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+})
+
+#define gcsr_xchg(v, m, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__( \
+ " gcsrxchg %[val], %[mask], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [mask] "r" (m), [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+/* Guest CSRS read and write */
+#define read_gcsr_crmd() gcsr_read(LOONGARCH_CSR_CRMD)
+#define write_gcsr_crmd(val) gcsr_write(val, LOONGARCH_CSR_CRMD)
+#define read_gcsr_prmd() gcsr_read(LOONGARCH_CSR_PRMD)
+#define write_gcsr_prmd(val) gcsr_write(val, LOONGARCH_CSR_PRMD)
+#define read_gcsr_euen() gcsr_read(LOONGARCH_CSR_EUEN)
+#define write_gcsr_euen(val) gcsr_write(val, LOONGARCH_CSR_EUEN)
+#define read_gcsr_misc() gcsr_read(LOONGARCH_CSR_MISC)
+#define write_gcsr_misc(val) gcsr_write(val, LOONGARCH_CSR_MISC)
+#define read_gcsr_ecfg() gcsr_read(LOONGARCH_CSR_ECFG)
+#define write_gcsr_ecfg(val) gcsr_write(val, LOONGARCH_CSR_ECFG)
+#define read_gcsr_estat() gcsr_read(LOONGARCH_CSR_ESTAT)
+#define write_gcsr_estat(val) gcsr_write(val, LOONGARCH_CSR_ESTAT)
+#define read_gcsr_era() gcsr_read(LOONGARCH_CSR_ERA)
+#define write_gcsr_era(val) gcsr_write(val, LOONGARCH_CSR_ERA)
+#define read_gcsr_badv() gcsr_read(LOONGARCH_CSR_BADV)
+#define write_gcsr_badv(val) gcsr_write(val, LOONGARCH_CSR_BADV)
+#define read_gcsr_badi() gcsr_read(LOONGARCH_CSR_BADI)
+#define write_gcsr_badi(val) gcsr_write(val, LOONGARCH_CSR_BADI)
+#define read_gcsr_eentry() gcsr_read(LOONGARCH_CSR_EENTRY)
+#define write_gcsr_eentry(val) gcsr_write(val, LOONGARCH_CSR_EENTRY)
+
+#define read_gcsr_asid() gcsr_read(LOONGARCH_CSR_ASID)
+#define write_gcsr_asid(val) gcsr_write(val, LOONGARCH_CSR_ASID)
+#define read_gcsr_pgdl() gcsr_read(LOONGARCH_CSR_PGDL)
+#define write_gcsr_pgdl(val) gcsr_write(val, LOONGARCH_CSR_PGDL)
+#define read_gcsr_pgdh() gcsr_read(LOONGARCH_CSR_PGDH)
+#define write_gcsr_pgdh(val) gcsr_write(val, LOONGARCH_CSR_PGDH)
+#define write_gcsr_pgd(val) gcsr_write(val, LOONGARCH_CSR_PGD)
+#define read_gcsr_pgd() gcsr_read(LOONGARCH_CSR_PGD)
+#define read_gcsr_pwctl0() gcsr_read(LOONGARCH_CSR_PWCTL0)
+#define write_gcsr_pwctl0(val) gcsr_write(val, LOONGARCH_CSR_PWCTL0)
+#define read_gcsr_pwctl1() gcsr_read(LOONGARCH_CSR_PWCTL1)
+#define write_gcsr_pwctl1(val) gcsr_write(val, LOONGARCH_CSR_PWCTL1)
+#define read_gcsr_stlbpgsize() gcsr_read(LOONGARCH_CSR_STLBPGSIZE)
+#define write_gcsr_stlbpgsize(val) gcsr_write(val, LOONGARCH_CSR_STLBPGSIZE)
+#define read_gcsr_rvacfg() gcsr_read(LOONGARCH_CSR_RVACFG)
+#define write_gcsr_rvacfg(val) gcsr_write(val, LOONGARCH_CSR_RVACFG)
+
+#define read_gcsr_cpuid() gcsr_read(LOONGARCH_CSR_CPUID)
+#define write_gcsr_cpuid(val) gcsr_write(val, LOONGARCH_CSR_CPUID)
+#define read_gcsr_prcfg1() gcsr_read(LOONGARCH_CSR_PRCFG1)
+#define write_gcsr_prcfg1(val) gcsr_write(val, LOONGARCH_CSR_PRCFG1)
+#define read_gcsr_prcfg2() gcsr_read(LOONGARCH_CSR_PRCFG2)
+#define write_gcsr_prcfg2(val) gcsr_write(val, LOONGARCH_CSR_PRCFG2)
+#define read_gcsr_prcfg3() gcsr_read(LOONGARCH_CSR_PRCFG3)
+#define write_gcsr_prcfg3(val) gcsr_write(val, LOONGARCH_CSR_PRCFG3)
+
+#define read_gcsr_kscratch0() gcsr_read(LOONGARCH_CSR_KS0)
+#define write_gcsr_kscratch0(val) gcsr_write(val, LOONGARCH_CSR_KS0)
+#define read_gcsr_kscratch1() gcsr_read(LOONGARCH_CSR_KS1)
+#define write_gcsr_kscratch1(val) gcsr_write(val, LOONGARCH_CSR_KS1)
+#define read_gcsr_kscratch2() gcsr_read(LOONGARCH_CSR_KS2)
+#define write_gcsr_kscratch2(val) gcsr_write(val, LOONGARCH_CSR_KS2)
+#define read_gcsr_kscratch3() gcsr_read(LOONGARCH_CSR_KS3)
+#define write_gcsr_kscratch3(val) gcsr_write(val, LOONGARCH_CSR_KS3)
+#define read_gcsr_kscratch4() gcsr_read(LOONGARCH_CSR_KS4)
+#define write_gcsr_kscratch4(val) gcsr_write(val, LOONGARCH_CSR_KS4)
+#define read_gcsr_kscratch5() gcsr_read(LOONGARCH_CSR_KS5)
+#define write_gcsr_kscratch5(val) gcsr_write(val, LOONGARCH_CSR_KS5)
+#define read_gcsr_kscratch6() gcsr_read(LOONGARCH_CSR_KS6)
+#define write_gcsr_kscratch6(val) gcsr_write(val, LOONGARCH_CSR_KS6)
+#define read_gcsr_kscratch7() gcsr_read(LOONGARCH_CSR_KS7)
+#define write_gcsr_kscratch7(val) gcsr_write(val, LOONGARCH_CSR_KS7)
+
+#define read_gcsr_timerid() gcsr_read(LOONGARCH_CSR_TMID)
+#define write_gcsr_timerid(val) gcsr_write(val, LOONGARCH_CSR_TMID)
+#define read_gcsr_timercfg() gcsr_read(LOONGARCH_CSR_TCFG)
+#define write_gcsr_timercfg(val) gcsr_write(val, LOONGARCH_CSR_TCFG)
+#define read_gcsr_timertick() gcsr_read(LOONGARCH_CSR_TVAL)
+#define write_gcsr_timertick(val) gcsr_write(val, LOONGARCH_CSR_TVAL)
+#define read_gcsr_timeroffset() gcsr_read(LOONGARCH_CSR_CNTC)
+#define write_gcsr_timeroffset(val) gcsr_write(val, LOONGARCH_CSR_CNTC)
+
+#define read_gcsr_llbctl() gcsr_read(LOONGARCH_CSR_LLBCTL)
+#define write_gcsr_llbctl(val) gcsr_write(val, LOONGARCH_CSR_LLBCTL)
+
+#define read_gcsr_tlbidx() gcsr_read(LOONGARCH_CSR_TLBIDX)
+#define write_gcsr_tlbidx(val) gcsr_write(val, LOONGARCH_CSR_TLBIDX)
+#define read_gcsr_tlbrentry() gcsr_read(LOONGARCH_CSR_TLBRENTRY)
+#define write_gcsr_tlbrentry(val) gcsr_write(val, LOONGARCH_CSR_TLBRENTRY)
+#define read_gcsr_tlbrbadv() gcsr_read(LOONGARCH_CSR_TLBRBADV)
+#define write_gcsr_tlbrbadv(val) gcsr_write(val, LOONGARCH_CSR_TLBRBADV)
+#define read_gcsr_tlbrera() gcsr_read(LOONGARCH_CSR_TLBRERA)
+#define write_gcsr_tlbrera(val) gcsr_write(val, LOONGARCH_CSR_TLBRERA)
+#define read_gcsr_tlbrsave() gcsr_read(LOONGARCH_CSR_TLBRSAVE)
+#define write_gcsr_tlbrsave(val) gcsr_write(val, LOONGARCH_CSR_TLBRSAVE)
+#define read_gcsr_tlbrelo0() gcsr_read(LOONGARCH_CSR_TLBRELO0)
+#define write_gcsr_tlbrelo0(val) gcsr_write(val, LOONGARCH_CSR_TLBRELO0)
+#define read_gcsr_tlbrelo1() gcsr_read(LOONGARCH_CSR_TLBRELO1)
+#define write_gcsr_tlbrelo1(val) gcsr_write(val, LOONGARCH_CSR_TLBRELO1)
+#define read_gcsr_tlbrehi() gcsr_read(LOONGARCH_CSR_TLBREHI)
+#define write_gcsr_tlbrehi(val) gcsr_write(val, LOONGARCH_CSR_TLBREHI)
+#define read_gcsr_tlbrprmd() gcsr_read(LOONGARCH_CSR_TLBRPRMD)
+#define write_gcsr_tlbrprmd(val) gcsr_write(val, LOONGARCH_CSR_TLBRPRMD)
+
+#define read_gcsr_directwin0() gcsr_read(LOONGARCH_CSR_DMWIN0)
+#define write_gcsr_directwin0(val) gcsr_write(val, LOONGARCH_CSR_DMWIN0)
+#define read_gcsr_directwin1() gcsr_read(LOONGARCH_CSR_DMWIN1)
+#define write_gcsr_directwin1(val) gcsr_write(val, LOONGARCH_CSR_DMWIN1)
+#define read_gcsr_directwin2() gcsr_read(LOONGARCH_CSR_DMWIN2)
+#define write_gcsr_directwin2(val) gcsr_write(val, LOONGARCH_CSR_DMWIN2)
+#define read_gcsr_directwin3() gcsr_read(LOONGARCH_CSR_DMWIN3)
+#define write_gcsr_directwin3(val) gcsr_write(val, LOONGARCH_CSR_DMWIN3)
+
+/* Guest related CSRs */
+#define read_csr_gtlbc() csr_read64(LOONGARCH_CSR_GTLBC)
+#define write_csr_gtlbc(val) csr_write64(val, LOONGARCH_CSR_GTLBC)
+#define read_csr_trgp() csr_read64(LOONGARCH_CSR_TRGP)
+#define read_csr_gcfg() csr_read64(LOONGARCH_CSR_GCFG)
+#define write_csr_gcfg(val) csr_write64(val, LOONGARCH_CSR_GCFG)
+#define read_csr_gstat() csr_read64(LOONGARCH_CSR_GSTAT)
+#define write_csr_gstat(val) csr_write64(val, LOONGARCH_CSR_GSTAT)
+#define read_csr_gintc() csr_read64(LOONGARCH_CSR_GINTC)
+#define write_csr_gintc(val) csr_write64(val, LOONGARCH_CSR_GINTC)
+#define read_csr_gcntc() csr_read64(LOONGARCH_CSR_GCNTC)
+#define write_csr_gcntc(val) csr_write64(val, LOONGARCH_CSR_GCNTC)
+
+#define __BUILD_GCSR_OP(name) __BUILD_CSR_COMMON(gcsr_##name)
+
+__BUILD_CSR_OP(gcfg)
+__BUILD_CSR_OP(gstat)
+__BUILD_CSR_OP(gtlbc)
+__BUILD_CSR_OP(gintc)
+__BUILD_GCSR_OP(llbctl)
+__BUILD_GCSR_OP(tlbidx)
+
+#define set_gcsr_estat(val) \
+ gcsr_xchg(val, val, LOONGARCH_CSR_ESTAT)
+#define clear_gcsr_estat(val) \
+ gcsr_xchg(~(val), val, LOONGARCH_CSR_ESTAT)
+
+#define kvm_read_hw_gcsr(id) gcsr_read(id)
+#define kvm_write_hw_gcsr(id, val) gcsr_write(val, id)
+
+#define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid))
+#define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid))
+
+int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+static __always_inline unsigned long kvm_read_sw_gcsr(struct loongarch_csrs *csr, int gid)
+{
+ return csr->csrs[gid];
+}
+
+static __always_inline void kvm_write_sw_gcsr(struct loongarch_csrs *csr, int gid, unsigned long val)
+{
+ csr->csrs[gid] = val;
+}
+
+static __always_inline void kvm_set_sw_gcsr(struct loongarch_csrs *csr,
+ int gid, unsigned long val)
+{
+ csr->csrs[gid] |= val;
+}
+
+static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr,
+ int gid, unsigned long mask, unsigned long val)
+{
+ unsigned long _mask = mask;
+
+ csr->csrs[gid] &= ~_mask;
+ csr->csrs[gid] |= val & _mask;
+}
+
+#endif /* __ASM_LOONGARCH_KVM_CSR_H__ */
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
new file mode 100644
index 0000000000..11328700d4
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef __ASM_LOONGARCH_KVM_HOST_H__
+#define __ASM_LOONGARCH_KVM_HOST_H__
+
+#include <linux/cpumask.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/kvm.h>
+#include <linux/kvm_types.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/types.h>
+
+#include <asm/inst.h>
+#include <asm/kvm_mmu.h>
+#include <asm/loongarch.h>
+
+/* Loongarch KVM register ids */
+#define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT)
+#define KVM_GET_IOC_CPUCFG_IDX(id) ((id & KVM_CPUCFG_IDX_MASK) >> LOONGARCH_REG_SHIFT)
+
+#define KVM_MAX_VCPUS 256
+#define KVM_MAX_CPUCFG_REGS 21
+/* memory slots that does not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 0
+
+#define KVM_HALT_POLL_NS_DEFAULT 500000
+
+struct kvm_vm_stat {
+ struct kvm_vm_stat_generic generic;
+ u64 pages;
+ u64 hugepages;
+};
+
+struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
+ u64 int_exits;
+ u64 idle_exits;
+ u64 cpucfg_exits;
+ u64 signal_exits;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_context {
+ unsigned long vpid_cache;
+ struct kvm_vcpu *last_vcpu;
+};
+
+struct kvm_world_switch {
+ int (*exc_entry)(void);
+ int (*enter_guest)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ unsigned long page_order;
+};
+
+#define MAX_PGTABLE_LEVELS 4
+
+struct kvm_arch {
+ /* Guest physical mm */
+ kvm_pte_t *pgd;
+ unsigned long gpa_size;
+ unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
+ unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
+ unsigned int root_level;
+
+ s64 time_offset;
+ struct kvm_context __percpu *vmcs;
+};
+
+#define CSR_MAX_NUMS 0x800
+
+struct loongarch_csrs {
+ unsigned long csrs[CSR_MAX_NUMS];
+};
+
+/* Resume Flags */
+#define RESUME_HOST 0
+#define RESUME_GUEST 1
+
+enum emulation_result {
+ EMULATE_DONE, /* no further processing */
+ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
+ EMULATE_DO_IOCSR, /* handle IOCSR request */
+ EMULATE_FAIL, /* can't emulate this instruction */
+ EMULATE_EXCEPT, /* A guest exception has been generated */
+};
+
+#define KVM_LARCH_FPU (0x1 << 0)
+#define KVM_LARCH_SWCSR_LATEST (0x1 << 1)
+#define KVM_LARCH_HWCSR_USABLE (0x1 << 2)
+
+struct kvm_vcpu_arch {
+ /*
+ * Switch pointer-to-function type to unsigned long
+ * for loading the value into register directly.
+ */
+ unsigned long host_eentry;
+ unsigned long guest_eentry;
+
+ /* Pointers stored here for easy accessing from assembly code */
+ int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+ /* Host registers preserved across guest mode execution */
+ unsigned long host_sp;
+ unsigned long host_tp;
+ unsigned long host_pgd;
+
+ /* Host CSRs are used when handling exits from guest */
+ unsigned long badi;
+ unsigned long badv;
+ unsigned long host_ecfg;
+ unsigned long host_estat;
+ unsigned long host_percpu;
+
+ /* GPRs */
+ unsigned long gprs[32];
+ unsigned long pc;
+
+ /* Which auxiliary state is loaded (KVM_LARCH_*) */
+ unsigned int aux_inuse;
+
+ /* FPU state */
+ struct loongarch_fpu fpu FPU_ALIGN;
+
+ /* CSR state */
+ struct loongarch_csrs *csr;
+
+ /* GPR used as IO source/target */
+ u32 io_gpr;
+
+ /* KVM register to control count timer */
+ u32 count_ctl;
+ struct hrtimer swtimer;
+
+ /* Bitmask of intr that are pending */
+ unsigned long irq_pending;
+ /* Bitmask of pending intr to be cleared */
+ unsigned long irq_clear;
+
+ /* Bitmask of exceptions that are pending */
+ unsigned long exception_pending;
+ unsigned int esubcode;
+
+ /* Cache for pages needed inside spinlock regions */
+ struct kvm_mmu_memory_cache mmu_page_cache;
+
+ /* vcpu's vpid */
+ u64 vpid;
+
+ /* Frequency of stable timer in Hz */
+ u64 timer_mhz;
+ ktime_t expire;
+
+ /* Last CPU the vCPU state was loaded on */
+ int last_sched_cpu;
+ /* mp state */
+ struct kvm_mp_state mp_state;
+ /* cpucfg */
+ u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+};
+
+static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
+{
+ return csr->csrs[reg];
+}
+
+static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned long val)
+{
+ csr->csrs[reg] = val;
+}
+
+/* Debug: dump vcpu state */
+int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
+
+/* MMU handling */
+void kvm_flush_tlb_all(void);
+void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
+int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+
+static inline void update_pc(struct kvm_vcpu_arch *arch)
+{
+ arch->pc += 4;
+}
+
+/*
+ * kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault.
+ * @vcpu: Virtual CPU.
+ *
+ * Returns: Whether the TLBL exception was likely due to an instruction
+ * fetch fault rather than a data load fault.
+ */
+static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch)
+{
+ return arch->pc == arch->badv;
+}
+
+/* Misc */
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {}
+void kvm_check_vpid(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer);
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot);
+void kvm_init_vmcs(struct kvm *kvm);
+void kvm_exc_entry(void);
+int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+extern unsigned long vpid_mask;
+extern const unsigned long kvm_exception_size;
+extern const unsigned long kvm_enter_guest_size;
+extern struct kvm_world_switch *kvm_loongarch_ops;
+
+#define SW_GCSR (1 << 0)
+#define HW_GCSR (1 << 1)
+#define INVALID_GCSR (1 << 2)
+
+int get_gcsr_flag(int csr);
+void set_hw_gcsr(int csr_id, unsigned long val);
+
+#endif /* __ASM_LOONGARCH_KVM_HOST_H__ */
diff --git a/arch/loongarch/include/asm/kvm_mmu.h b/arch/loongarch/include/asm/kvm_mmu.h
new file mode 100644
index 0000000000..099bafc6f7
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_mmu.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef __ASM_LOONGARCH_KVM_MMU_H__
+#define __ASM_LOONGARCH_KVM_MMU_H__
+
+#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
+ * for which pages need to be cached.
+ */
+#define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1)
+
+#define _KVM_FLUSH_PGTABLE 0x1
+#define _KVM_HAS_PGMASK 0x2
+#define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
+#define kvm_pte_pfn(x) ((phys_addr_t)((x & _PFN_MASK) >> PFN_PTE_SHIFT))
+
+typedef unsigned long kvm_pte_t;
+typedef struct kvm_ptw_ctx kvm_ptw_ctx;
+typedef int (*kvm_pte_ops)(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx);
+
+struct kvm_ptw_ctx {
+ kvm_pte_ops ops;
+ unsigned long flag;
+
+ /* for kvm_arch_mmu_enable_log_dirty_pt_masked use */
+ unsigned long mask;
+ unsigned long gfn;
+
+ /* page walk mmu info */
+ unsigned int level;
+ unsigned long pgtable_shift;
+ unsigned long invalid_entry;
+ unsigned long *invalid_ptes;
+ unsigned int *pte_shifts;
+ void *opaque;
+
+ /* free pte table page list */
+ struct list_head list;
+};
+
+kvm_pte_t *kvm_pgd_alloc(void);
+
+static inline void kvm_set_pte(kvm_pte_t *ptep, kvm_pte_t val)
+{
+ WRITE_ONCE(*ptep, val);
+}
+
+static inline int kvm_pte_write(kvm_pte_t pte) { return pte & _PAGE_WRITE; }
+static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & _PAGE_DIRTY; }
+static inline int kvm_pte_young(kvm_pte_t pte) { return pte & _PAGE_ACCESSED; }
+static inline int kvm_pte_huge(kvm_pte_t pte) { return pte & _PAGE_HUGE; }
+
+static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
+{
+ return pte | _PAGE_ACCESSED;
+}
+
+static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t pte)
+{
+ return pte & ~_PAGE_ACCESSED;
+}
+
+static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
+{
+ return pte | _PAGE_DIRTY;
+}
+
+static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
+{
+ return pte & ~_PAGE_DIRTY;
+}
+
+static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
+{
+ return pte | _PAGE_HUGE;
+}
+
+static inline kvm_pte_t kvm_pte_mksmall(kvm_pte_t pte)
+{
+ return pte & ~_PAGE_HUGE;
+}
+
+static inline int kvm_need_flush(kvm_ptw_ctx *ctx)
+{
+ return ctx->flag & _KVM_FLUSH_PGTABLE;
+}
+
+static inline kvm_pte_t *kvm_pgtable_offset(kvm_ptw_ctx *ctx, kvm_pte_t *table,
+ phys_addr_t addr)
+{
+
+ return table + ((addr >> ctx->pgtable_shift) & (PTRS_PER_PTE - 1));
+}
+
+static inline phys_addr_t kvm_pgtable_addr_end(kvm_ptw_ctx *ctx,
+ phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary, size;
+
+ size = 0x1UL << ctx->pgtable_shift;
+ boundary = (addr + size) & ~(size - 1);
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+static inline int kvm_pte_present(kvm_ptw_ctx *ctx, kvm_pte_t *entry)
+{
+ if (!ctx || ctx->level == 0)
+ return !!(*entry & _PAGE_PRESENT);
+
+ return *entry != ctx->invalid_entry;
+}
+
+static inline int kvm_pte_none(kvm_ptw_ctx *ctx, kvm_pte_t *entry)
+{
+ return *entry == ctx->invalid_entry;
+}
+
+static inline void kvm_ptw_enter(kvm_ptw_ctx *ctx)
+{
+ ctx->level--;
+ ctx->pgtable_shift = ctx->pte_shifts[ctx->level];
+ ctx->invalid_entry = ctx->invalid_ptes[ctx->level];
+}
+
+static inline void kvm_ptw_exit(kvm_ptw_ctx *ctx)
+{
+ ctx->level++;
+ ctx->pgtable_shift = ctx->pte_shifts[ctx->level];
+ ctx->invalid_entry = ctx->invalid_ptes[ctx->level];
+}
+
+#endif /* __ASM_LOONGARCH_KVM_MMU_H__ */
diff --git a/arch/loongarch/include/asm/kvm_types.h b/arch/loongarch/include/asm/kvm_types.h
new file mode 100644
index 0000000000..2fe1d4bdff
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KVM_TYPES_H
+#define _ASM_LOONGARCH_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_LOONGARCH_KVM_TYPES_H */
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
new file mode 100644
index 0000000000..553cfa2b2b
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef __ASM_LOONGARCH_KVM_VCPU_H__
+#define __ASM_LOONGARCH_KVM_VCPU_H__
+
+#include <linux/kvm_host.h>
+#include <asm/loongarch.h>
+
+/* Controlled by 0x5 guest estat */
+#define CPU_SIP0 (_ULCAST_(1))
+#define CPU_SIP1 (_ULCAST_(1) << 1)
+#define CPU_PMU (_ULCAST_(1) << 10)
+#define CPU_TIMER (_ULCAST_(1) << 11)
+#define CPU_IPI (_ULCAST_(1) << 12)
+
+/* Controlled by 0x52 guest exception VIP aligned to estat bit 5~12 */
+#define CPU_IP0 (_ULCAST_(1))
+#define CPU_IP1 (_ULCAST_(1) << 1)
+#define CPU_IP2 (_ULCAST_(1) << 2)
+#define CPU_IP3 (_ULCAST_(1) << 3)
+#define CPU_IP4 (_ULCAST_(1) << 4)
+#define CPU_IP5 (_ULCAST_(1) << 5)
+#define CPU_IP6 (_ULCAST_(1) << 6)
+#define CPU_IP7 (_ULCAST_(1) << 7)
+
+#define MNSEC_PER_SEC (NSEC_PER_SEC >> 20)
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_LOONGSON_IRQ_TYPE_SHIFT 24
+#define KVM_LOONGSON_IRQ_TYPE_MASK 0xff
+#define KVM_LOONGSON_IRQ_VCPU_SHIFT 16
+#define KVM_LOONGSON_IRQ_VCPU_MASK 0xff
+#define KVM_LOONGSON_IRQ_NUM_SHIFT 0
+#define KVM_LOONGSON_IRQ_NUM_MASK 0xffff
+
+typedef union loongarch_instruction larch_inst;
+typedef int (*exit_handle_fn)(struct kvm_vcpu *);
+
+int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst);
+int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst);
+int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_emu_idle(struct kvm_vcpu *vcpu);
+int kvm_pending_timer(struct kvm_vcpu *vcpu);
+int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault);
+void kvm_deliver_intr(struct kvm_vcpu *vcpu);
+void kvm_deliver_exception(struct kvm_vcpu *vcpu);
+
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+void kvm_save_fpu(struct loongarch_fpu *fpu);
+void kvm_restore_fpu(struct loongarch_fpu *fpu);
+void kvm_restore_fcsr(struct loongarch_fpu *fpu);
+
+void kvm_acquire_timer(struct kvm_vcpu *vcpu);
+void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
+void kvm_reset_timer(struct kvm_vcpu *vcpu);
+void kvm_save_timer(struct kvm_vcpu *vcpu);
+void kvm_restore_timer(struct kvm_vcpu *vcpu);
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+
+/*
+ * Loongarch KVM guest interrupt handling
+ */
+static inline void kvm_queue_irq(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ set_bit(irq, &vcpu->arch.irq_pending);
+ clear_bit(irq, &vcpu->arch.irq_clear);
+}
+
+static inline void kvm_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ clear_bit(irq, &vcpu->arch.irq_pending);
+ set_bit(irq, &vcpu->arch.irq_clear);
+}
+
+static inline int kvm_queue_exception(struct kvm_vcpu *vcpu,
+ unsigned int code, unsigned int subcode)
+{
+ /* only one exception can be injected */
+ if (!vcpu->arch.exception_pending) {
+ set_bit(code, &vcpu->arch.exception_pending);
+ vcpu->arch.esubcode = subcode;
+ return 0;
+ } else
+ return -1;
+}
+
+#endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h
index c49675852b..f53ea653af 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -70,22 +70,27 @@ static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
/**
- * local_add_unless - add unless the number is a given value
+ * local_add_unless - add unless the number is already a given value
* @l: pointer of type local_t
* @a: the amount to add to l...
* @u: ...unless l is equal to u.
*
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
+ * Atomically adds @a to @l, if @v was not already @u.
+ * Returns true if the addition was done.
*/
-#define local_add_unless(l, a, u) \
-({ \
- long c, old; \
- c = local_read(l); \
- while (c != (u) && (old = local_cmpxchg((l), c, c + (a))) != c) \
- c = old; \
- c != (u); \
-})
+static inline bool
+local_add_unless(local_t *l, long a, long u)
+{
+ long c = local_read(l);
+
+ do {
+ if (unlikely(c == u))
+ return false;
+ } while (!local_try_cmpxchg(l, &c, c + a));
+
+ return true;
+}
+
#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
#define local_dec_return(l) local_sub_return(1, (l))
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 33531d432b..46366e783c 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -226,6 +226,7 @@
#define LOONGARCH_CSR_ECFG 0x4 /* Exception config */
#define CSR_ECFG_VS_SHIFT 16
#define CSR_ECFG_VS_WIDTH 3
+#define CSR_ECFG_VS_SHIFT_END (CSR_ECFG_VS_SHIFT + CSR_ECFG_VS_WIDTH - 1)
#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT)
#define CSR_ECFG_IM_SHIFT 0
#define CSR_ECFG_IM_WIDTH 14
@@ -314,13 +315,14 @@
#define CSR_TLBLO1_V (_ULCAST_(0x1) << CSR_TLBLO1_V_SHIFT)
#define LOONGARCH_CSR_GTLBC 0x15 /* Guest TLB control */
-#define CSR_GTLBC_RID_SHIFT 16
-#define CSR_GTLBC_RID_WIDTH 8
-#define CSR_GTLBC_RID (_ULCAST_(0xff) << CSR_GTLBC_RID_SHIFT)
+#define CSR_GTLBC_TGID_SHIFT 16
+#define CSR_GTLBC_TGID_WIDTH 8
+#define CSR_GTLBC_TGID_SHIFT_END (CSR_GTLBC_TGID_SHIFT + CSR_GTLBC_TGID_WIDTH - 1)
+#define CSR_GTLBC_TGID (_ULCAST_(0xff) << CSR_GTLBC_TGID_SHIFT)
#define CSR_GTLBC_TOTI_SHIFT 13
#define CSR_GTLBC_TOTI (_ULCAST_(0x1) << CSR_GTLBC_TOTI_SHIFT)
-#define CSR_GTLBC_USERID_SHIFT 12
-#define CSR_GTLBC_USERID (_ULCAST_(0x1) << CSR_GTLBC_USERID_SHIFT)
+#define CSR_GTLBC_USETGID_SHIFT 12
+#define CSR_GTLBC_USETGID (_ULCAST_(0x1) << CSR_GTLBC_USETGID_SHIFT)
#define CSR_GTLBC_GMTLBSZ_SHIFT 0
#define CSR_GTLBC_GMTLBSZ_WIDTH 6
#define CSR_GTLBC_GMTLBSZ (_ULCAST_(0x3f) << CSR_GTLBC_GMTLBSZ_SHIFT)
@@ -475,6 +477,7 @@
#define LOONGARCH_CSR_GSTAT 0x50 /* Guest status */
#define CSR_GSTAT_GID_SHIFT 16
#define CSR_GSTAT_GID_WIDTH 8
+#define CSR_GSTAT_GID_SHIFT_END (CSR_GSTAT_GID_SHIFT + CSR_GSTAT_GID_WIDTH - 1)
#define CSR_GSTAT_GID (_ULCAST_(0xff) << CSR_GSTAT_GID_SHIFT)
#define CSR_GSTAT_GIDBIT_SHIFT 4
#define CSR_GSTAT_GIDBIT_WIDTH 6
@@ -525,6 +528,12 @@
#define CSR_GCFG_MATC_GUEST (_ULCAST_(0x0) << CSR_GCFG_MATC_SHITF)
#define CSR_GCFG_MATC_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATC_SHITF)
#define CSR_GCFG_MATC_NEST (_ULCAST_(0x2) << CSR_GCFG_MATC_SHITF)
+#define CSR_GCFG_MATP_NEST_SHIFT 2
+#define CSR_GCFG_MATP_NEST (_ULCAST_(0x1) << CSR_GCFG_MATP_NEST_SHIFT)
+#define CSR_GCFG_MATP_ROOT_SHIFT 1
+#define CSR_GCFG_MATP_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATP_ROOT_SHIFT)
+#define CSR_GCFG_MATP_GUEST_SHIFT 0
+#define CSR_GCFG_MATP_GUEST (_ULCAST_(0x1) << CSR_GCFG_MATP_GUEST_SHIFT)
#define LOONGARCH_CSR_GINTC 0x52 /* Guest interrupt control */
#define CSR_GINTC_HC_SHIFT 16
@@ -1089,12 +1098,11 @@
static __always_inline u64 drdtime(void)
{
- int rID = 0;
u64 val = 0;
__asm__ __volatile__(
- "rdtime.d %0, %1 \n\t"
- : "=r"(val), "=r"(rID)
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
:
);
return val;
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index ed5da02b1c..9b36ac003f 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \
switch (size) { \
case 4: \
__asm__ __volatile__( \
- "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \
: [val] "r" (val)); \
break; \
case 8: \
__asm__ __volatile__( \
- "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \
: [val] "r" (val)); \
break; \
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
PERCPU_OP(or, or, |)
#undef PERCPU_OP
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
{
unsigned long ret;
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
return ret;
}
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
}
}
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
- int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 79470f0b4f..4e2d6b7ca2 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -84,6 +84,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
if (!ptdesc)
return NULL;
+ pagetable_pud_ctor(ptdesc);
pud = ptdesc_address(ptdesc);
pud_init(pud);
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
new file mode 100644
index 0000000000..c6ad2ee610
--- /dev/null
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef __UAPI_ASM_LOONGARCH_KVM_H
+#define __UAPI_ASM_LOONGARCH_KVM_H
+
+#include <linux/types.h>
+
+/*
+ * KVM LoongArch specific structures and definitions.
+ *
+ * Some parts derived from the x86 version of this file.
+ */
+
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
+
+/*
+ * for KVM_GET_REGS and KVM_SET_REGS
+ */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 gpr[32];
+ __u64 pc;
+};
+
+/*
+ * for KVM_GET_FPU and KVM_SET_FPU
+ */
+struct kvm_fpu {
+ __u32 fcsr;
+ __u64 fcc; /* 8x8 */
+ struct kvm_fpureg {
+ __u64 val64[4];
+ } fpr[32];
+};
+
+/*
+ * For LoongArch, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
+ * registers. The id field is broken down as follows:
+ *
+ * bits[63..52] - As per linux/kvm.h
+ * bits[51..32] - Must be zero.
+ * bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CSR registers.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / SIMD registers (see definitions below).
+ *
+ * Other sets registers may be added in the future. Each set would
+ * have its own identifier in bits[31..16].
+ */
+
+#define KVM_REG_LOONGARCH_GPR (KVM_REG_LOONGARCH | 0x00000ULL)
+#define KVM_REG_LOONGARCH_CSR (KVM_REG_LOONGARCH | 0x10000ULL)
+#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL)
+#define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL)
+#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL)
+#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL)
+#define KVM_CSR_IDX_MASK 0x7fff
+#define KVM_CPUCFG_IDX_MASK 0x7fff
+
+/*
+ * KVM_REG_LOONGARCH_KVM - KVM specific control registers.
+ */
+
+#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1)
+#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2)
+
+#define LOONGARCH_REG_SHIFT 3
+#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
+#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
+#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* dummy definition */
+struct kvm_sregs {
+};
+
+struct kvm_iocsr_entry {
+ __u32 addr;
+ __u32 pad;
+ __u64 data;
+};
+
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 64
+#define KVM_MAX_CORES 256
+
+#endif /* __UAPI_ASM_LOONGARCH_KVM_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 4fcc168f07..3c808c6803 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 8e00a754e5..55d6a48c76 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -29,11 +29,9 @@ int disabled_cpus;
u64 acpi_saved_sp;
-#define MAX_CORE_PIC 256
-
#define PREFIX "ACPI: "
-struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size)
{
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 8da0726777..bee9f7a310 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -9,12 +9,13 @@
#include <linux/mm.h>
#include <linux/kbuild.h>
#include <linux/suspend.h>
+#include <linux/kvm_host.h>
#include <asm/cpu-info.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/ftrace.h>
-void output_ptreg_defines(void)
+static void __used output_ptreg_defines(void)
{
COMMENT("LoongArch pt_regs offsets.");
OFFSET(PT_R0, pt_regs, regs[0]);
@@ -61,7 +62,7 @@ void output_ptreg_defines(void)
BLANK();
}
-void output_task_defines(void)
+static void __used output_task_defines(void)
{
COMMENT("LoongArch task_struct offsets.");
OFFSET(TASK_STATE, task_struct, __state);
@@ -76,7 +77,7 @@ void output_task_defines(void)
BLANK();
}
-void output_thread_info_defines(void)
+static void __used output_thread_info_defines(void)
{
COMMENT("LoongArch thread_info offsets.");
OFFSET(TI_TASK, thread_info, task);
@@ -92,7 +93,7 @@ void output_thread_info_defines(void)
BLANK();
}
-void output_thread_defines(void)
+static void __used output_thread_defines(void)
{
COMMENT("LoongArch specific thread_struct offsets.");
OFFSET(THREAD_REG01, task_struct, thread.reg01);
@@ -128,7 +129,7 @@ void output_thread_defines(void)
BLANK();
}
-void output_thread_fpu_defines(void)
+static void __used output_thread_fpu_defines(void)
{
OFFSET(THREAD_FPR0, loongarch_fpu, fpr[0]);
OFFSET(THREAD_FPR1, loongarch_fpu, fpr[1]);
@@ -169,7 +170,7 @@ void output_thread_fpu_defines(void)
BLANK();
}
-void output_thread_lbt_defines(void)
+static void __used output_thread_lbt_defines(void)
{
OFFSET(THREAD_SCR0, loongarch_lbt, scr0);
OFFSET(THREAD_SCR1, loongarch_lbt, scr1);
@@ -179,7 +180,7 @@ void output_thread_lbt_defines(void)
BLANK();
}
-void output_mm_defines(void)
+static void __used output_mm_defines(void)
{
COMMENT("Size of struct page");
DEFINE(STRUCT_PAGE_SIZE, sizeof(struct page));
@@ -211,7 +212,7 @@ void output_mm_defines(void)
BLANK();
}
-void output_sc_defines(void)
+static void __used output_sc_defines(void)
{
COMMENT("Linux sigcontext offsets.");
OFFSET(SC_REGS, sigcontext, sc_regs);
@@ -219,7 +220,7 @@ void output_sc_defines(void)
BLANK();
}
-void output_signal_defines(void)
+static void __used output_signal_defines(void)
{
COMMENT("Linux signal numbers.");
DEFINE(_SIGHUP, SIGHUP);
@@ -257,7 +258,7 @@ void output_signal_defines(void)
}
#ifdef CONFIG_SMP
-void output_smpboot_defines(void)
+static void __used output_smpboot_defines(void)
{
COMMENT("Linux smp cpu boot offsets.");
OFFSET(CPU_BOOT_STACK, secondary_data, stack);
@@ -267,7 +268,7 @@ void output_smpboot_defines(void)
#endif
#ifdef CONFIG_HIBERNATION
-void output_pbe_defines(void)
+static void __used output_pbe_defines(void)
{
COMMENT("Linux struct pbe offsets.");
OFFSET(PBE_ADDRESS, pbe, address);
@@ -279,7 +280,7 @@ void output_pbe_defines(void)
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-void output_fgraph_ret_regs_defines(void)
+static void __used output_fgraph_ret_regs_defines(void)
{
COMMENT("LoongArch fgraph_ret_regs offsets.");
OFFSET(FGRET_REGS_A0, fgraph_ret_regs, regs[0]);
@@ -289,3 +290,34 @@ void output_fgraph_ret_regs_defines(void)
BLANK();
}
#endif
+
+static void __used output_kvm_defines(void)
+{
+ COMMENT("KVM/LoongArch Specific offsets.");
+
+ OFFSET(VCPU_FCC, kvm_vcpu_arch, fpu.fcc);
+ OFFSET(VCPU_FCSR0, kvm_vcpu_arch, fpu.fcsr);
+ BLANK();
+
+ OFFSET(KVM_VCPU_ARCH, kvm_vcpu, arch);
+ OFFSET(KVM_VCPU_KVM, kvm_vcpu, kvm);
+ OFFSET(KVM_VCPU_RUN, kvm_vcpu, run);
+ BLANK();
+
+ OFFSET(KVM_ARCH_HSP, kvm_vcpu_arch, host_sp);
+ OFFSET(KVM_ARCH_HTP, kvm_vcpu_arch, host_tp);
+ OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd);
+ OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit);
+ OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry);
+ OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry);
+ OFFSET(KVM_ARCH_GPC, kvm_vcpu_arch, pc);
+ OFFSET(KVM_ARCH_GGPR, kvm_vcpu_arch, gprs);
+ OFFSET(KVM_ARCH_HBADI, kvm_vcpu_arch, badi);
+ OFFSET(KVM_ARCH_HBADV, kvm_vcpu_arch, badv);
+ OFFSET(KVM_ARCH_HECFG, kvm_vcpu_arch, host_ecfg);
+ OFFSET(KVM_ARCH_HESTAT, kvm_vcpu_arch, host_estat);
+ OFFSET(KVM_ARCH_HPERCPU, kvm_vcpu_arch, host_percpu);
+
+ OFFSET(KVM_GPGD, kvm, arch.pgd);
+ BLANK();
+}
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index 9fc10cea21..acb5d33856 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -68,6 +68,11 @@ void __init efi_runtime_init(void)
unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR;
+#if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON)
+struct screen_info screen_info __section(".data");
+EXPORT_SYMBOL_GPL(screen_info);
+#endif
+
static void __init init_screen_info(void)
{
struct screen_info *si;
@@ -115,7 +120,8 @@ void __init efi_init(void)
set_bit(EFI_CONFIG_TABLES, &efi.flags);
- init_screen_info();
+ if (IS_ENABLED(CONFIG_EFI_EARLYCON) || IS_ENABLED(CONFIG_SYSFB))
+ init_screen_info();
if (boot_memmap == EFI_INVALID_TABLE_ADDR)
return;
diff --git a/arch/loongarch/kernel/image-vars.h b/arch/loongarch/kernel/image-vars.h
index e561989d02..5087416b96 100644
--- a/arch/loongarch/kernel/image-vars.h
+++ b/arch/loongarch/kernel/image-vars.h
@@ -12,7 +12,9 @@ __efistub_kernel_entry = kernel_entry;
__efistub_kernel_asize = kernel_asize;
__efistub_kernel_fsize = kernel_fsize;
__efistub_kernel_offset = kernel_offset;
+#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB)
__efistub_screen_info = screen_info;
+#endif
#endif
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index aed65915e9..b7c14a1242 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -16,7 +16,6 @@
#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/export.h>
-#include <linux/screen_info.h>
#include <linux/memblock.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
@@ -57,8 +56,6 @@
#define SMBIOS_CORE_PACKAGE_OFFSET 0x23
#define LOONGSON_EFI_ENABLE (1 << 3)
-struct screen_info screen_info __section(".data");
-
unsigned long fw_arg0, fw_arg1, fw_arg2;
DEFINE_PER_CPU(unsigned long, kernelsp);
struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly;
@@ -267,7 +264,9 @@ static void __init arch_parse_crashkernel(void)
unsigned long long crash_base, crash_size;
total_mem = memblock_phys_mem_size();
- ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
+ ret = parse_crashkernel(boot_command_line, total_mem,
+ &crash_size, &crash_base,
+ NULL, NULL);
if (ret < 0 || crash_size <= 0)
return;
@@ -367,6 +366,8 @@ void __init platform_init(void)
acpi_gbl_use_default_register_widths = false;
acpi_boot_table_init();
#endif
+
+ early_init_fdt_scan_reserved_mem();
unflatten_and_copy_device_tree();
#ifdef CONFIG_NUMA
@@ -400,8 +401,6 @@ static void __init arch_mem_init(char **cmdline_p)
check_kernel_sections_mem();
- early_init_fdt_scan_reserved_mem();
-
/*
* In order to reduce the possibility of kernel panic when failed to
* get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 42e3a0e189..378ffa78ff 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec)
}
}
+static inline void set_cpu_core_map(int cpu)
+{
+ int i;
+
+ cpumask_set_cpu(cpu, &cpu_core_setup_map);
+
+ for_each_cpu(i, &cpu_core_setup_map) {
+ if (cpu_data[cpu].package == cpu_data[i].package) {
+ cpumask_set_cpu(i, &cpu_core_map[cpu]);
+ cpumask_set_cpu(cpu, &cpu_core_map[i]);
+ }
+ }
+}
+
+static inline void set_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+
+ for_each_cpu(i, &cpu_sibling_setup_map) {
+ if (cpus_are_siblings(cpu, i)) {
+ cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
+ cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
+ }
+ }
+}
+
+static inline void clear_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ for_each_cpu(i, &cpu_sibling_setup_map) {
+ if (cpus_are_siblings(cpu, i)) {
+ cpumask_clear_cpu(i, &cpu_sibling_map[cpu]);
+ cpumask_clear_cpu(cpu, &cpu_sibling_map[i]);
+ }
+ }
+
+ cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+}
+
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+void calculate_cpu_foreign_map(void)
+{
+ int i, k, core_present;
+ cpumask_t temp_foreign_map;
+
+ /* Re-calculate the mask */
+ cpumask_clear(&temp_foreign_map);
+ for_each_online_cpu(i) {
+ core_present = 0;
+ for_each_cpu(k, &temp_foreign_map)
+ if (cpus_are_siblings(i, k))
+ core_present = 1;
+ if (!core_present)
+ cpumask_set_cpu(i, &temp_foreign_map);
+ }
+
+ for_each_online_cpu(i)
+ cpumask_andnot(&cpu_foreign_map[i],
+ &temp_foreign_map, &cpu_sibling_map[i]);
+}
+
/* Send mailbox buffer via Mail_Send */
static void csr_mail_send(uint64_t data, int cpu, int mailbox)
{
@@ -300,6 +367,7 @@ int loongson_cpu_disable(void)
numa_remove_cpu(cpu);
#endif
set_cpu_online(cpu, false);
+ clear_cpu_sibling_map(cpu);
calculate_cpu_foreign_map();
local_irq_save(flags);
irq_migrate_all_off_this_cpu();
@@ -334,6 +402,7 @@ void __noreturn arch_cpu_idle_dead(void)
addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
} while (addr == 0);
+ local_irq_disable();
init_fn = (void *)TO_CACHE(addr);
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
@@ -376,59 +445,6 @@ static int __init ipi_pm_init(void)
core_initcall(ipi_pm_init);
#endif
-static inline void set_cpu_sibling_map(int cpu)
-{
- int i;
-
- cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
-
- for_each_cpu(i, &cpu_sibling_setup_map) {
- if (cpus_are_siblings(cpu, i)) {
- cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
- cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
- }
- }
-}
-
-static inline void set_cpu_core_map(int cpu)
-{
- int i;
-
- cpumask_set_cpu(cpu, &cpu_core_setup_map);
-
- for_each_cpu(i, &cpu_core_setup_map) {
- if (cpu_data[cpu].package == cpu_data[i].package) {
- cpumask_set_cpu(i, &cpu_core_map[cpu]);
- cpumask_set_cpu(cpu, &cpu_core_map[i]);
- }
- }
-}
-
-/*
- * Calculate a new cpu_foreign_map mask whenever a
- * new cpu appears or disappears.
- */
-void calculate_cpu_foreign_map(void)
-{
- int i, k, core_present;
- cpumask_t temp_foreign_map;
-
- /* Re-calculate the mask */
- cpumask_clear(&temp_foreign_map);
- for_each_online_cpu(i) {
- core_present = 0;
- for_each_cpu(k, &temp_foreign_map)
- if (cpus_are_siblings(i, k))
- core_present = 1;
- if (!core_present)
- cpumask_set_cpu(i, &temp_foreign_map);
- }
-
- for_each_online_cpu(i)
- cpumask_andnot(&cpu_foreign_map[i],
- &temp_foreign_map, &cpu_sibling_map[i]);
-}
-
/* Preload SMP state for boot cpu */
void smp_prepare_boot_cpu(void)
{
@@ -506,7 +522,6 @@ asmlinkage void start_secondary(void)
sync_counter();
cpu = raw_smp_processor_id();
set_my_cpu_offset(per_cpu_offset(cpu));
- rcu_cpu_starting(cpu);
cpu_probe();
constant_clockevent_init();
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
new file mode 100644
index 0000000000..fda425babf
--- /dev/null
+++ b/arch/loongarch/kvm/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ help
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on AS_HAS_LVZ_EXTENSION
+ depends on HAVE_KVM
+ select HAVE_KVM_DIRTY_RING_ACQ_REL
+ select HAVE_KVM_EVENTFD
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select KVM_GENERIC_HARDWARE_ENABLING
+ select KVM_MMIO
+ select KVM_XFER_TO_GUEST_WORK
+ select MMU_NOTIFIER
+ select PREEMPT_NOTIFIERS
+ help
+ Support hosting virtualized guest machines using
+ hardware virtualization extensions. You will need
+ a processor equipped with virtualization extensions.
+
+ If unsure, say N.
+
+endif # VIRTUALIZATION
diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile
new file mode 100644
index 0000000000..244467d779
--- /dev/null
+++ b/arch/loongarch/kvm/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch KVM support
+#
+
+ccflags-y += -I $(srctree)/$(src)
+
+include $(srctree)/virt/kvm/Makefile.kvm
+
+obj-$(CONFIG_KVM) += kvm.o
+
+kvm-y += exit.o
+kvm-y += interrupt.o
+kvm-y += main.o
+kvm-y += mmu.o
+kvm-y += switch.o
+kvm-y += timer.o
+kvm-y += tlb.o
+kvm-y += vcpu.o
+kvm-y += vm.o
+
+CFLAGS_exit.o += $(call cc-option,-Wno-override-init,)
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
new file mode 100644
index 0000000000..ce8de3fa47
--- /dev/null
+++ b/arch/loongarch/kvm/exit.c
@@ -0,0 +1,696 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/vmalloc.h>
+#include <asm/fpu.h>
+#include <asm/inst.h>
+#include <asm/loongarch.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/time.h>
+#include <asm/tlb.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+#include "trace.h"
+
+static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid)
+{
+ unsigned long val = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 4.2.1
+ * For undefined CSR id, return value is 0
+ */
+ if (get_gcsr_flag(csrid) & SW_GCSR)
+ val = kvm_read_sw_gcsr(csr, csrid);
+ else
+ pr_warn_once("Unsupported csrrd 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return val;
+}
+
+static unsigned long kvm_emu_write_csr(struct kvm_vcpu *vcpu, int csrid, unsigned long val)
+{
+ unsigned long old = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(csrid) & SW_GCSR) {
+ old = kvm_read_sw_gcsr(csr, csrid);
+ kvm_write_sw_gcsr(csr, csrid, val);
+ } else
+ pr_warn_once("Unsupported csrwr 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return old;
+}
+
+static unsigned long kvm_emu_xchg_csr(struct kvm_vcpu *vcpu, int csrid,
+ unsigned long csr_mask, unsigned long val)
+{
+ unsigned long old = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(csrid) & SW_GCSR) {
+ old = kvm_read_sw_gcsr(csr, csrid);
+ val = (old & ~csr_mask) | (val & csr_mask);
+ kvm_write_sw_gcsr(csr, csrid, val);
+ old = old & csr_mask;
+ } else
+ pr_warn_once("Unsupported csrxchg 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return old;
+}
+
+static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ unsigned int rd, rj, csrid;
+ unsigned long csr_mask, val = 0;
+
+ /*
+ * CSR value mask imm
+ * rj = 0 means csrrd
+ * rj = 1 means csrwr
+ * rj != 0,1 means csrxchg
+ */
+ rd = inst.reg2csr_format.rd;
+ rj = inst.reg2csr_format.rj;
+ csrid = inst.reg2csr_format.csr;
+
+ /* Process CSR ops */
+ switch (rj) {
+ case 0: /* process csrrd */
+ val = kvm_emu_read_csr(vcpu, csrid);
+ vcpu->arch.gprs[rd] = val;
+ break;
+ case 1: /* process csrwr */
+ val = vcpu->arch.gprs[rd];
+ val = kvm_emu_write_csr(vcpu, csrid, val);
+ vcpu->arch.gprs[rd] = val;
+ break;
+ default: /* process csrxchg */
+ val = vcpu->arch.gprs[rd];
+ csr_mask = vcpu->arch.gprs[rj];
+ val = kvm_emu_xchg_csr(vcpu, csrid, csr_mask, val);
+ vcpu->arch.gprs[rd] = val;
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+ unsigned long val;
+ u32 addr, rd, rj, opcode;
+
+ /*
+ * Each IOCSR with different opcode
+ */
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ opcode = inst.reg2_format.opcode;
+ addr = vcpu->arch.gprs[rj];
+ ret = EMULATE_DO_IOCSR;
+ run->iocsr_io.phys_addr = addr;
+ run->iocsr_io.is_write = 0;
+
+ /* LoongArch is Little endian */
+ switch (opcode) {
+ case iocsrrdb_op:
+ run->iocsr_io.len = 1;
+ break;
+ case iocsrrdh_op:
+ run->iocsr_io.len = 2;
+ break;
+ case iocsrrdw_op:
+ run->iocsr_io.len = 4;
+ break;
+ case iocsrrdd_op:
+ run->iocsr_io.len = 8;
+ break;
+ case iocsrwrb_op:
+ run->iocsr_io.len = 1;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrh_op:
+ run->iocsr_io.len = 2;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrw_op:
+ run->iocsr_io.len = 4;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrd_op:
+ run->iocsr_io.len = 8;
+ run->iocsr_io.is_write = 1;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+
+ if (ret == EMULATE_DO_IOCSR) {
+ if (run->iocsr_io.is_write) {
+ val = vcpu->arch.gprs[rd];
+ memcpy(run->iocsr_io.data, &val, run->iocsr_io.len);
+ }
+ vcpu->arch.io_gpr = rd;
+ }
+
+ return ret;
+}
+
+int kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ enum emulation_result er = EMULATE_DONE;
+ unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
+
+ switch (run->iocsr_io.len) {
+ case 1:
+ *gpr = *(s8 *)run->iocsr_io.data;
+ break;
+ case 2:
+ *gpr = *(s16 *)run->iocsr_io.data;
+ break;
+ case 4:
+ *gpr = *(s32 *)run->iocsr_io.data;
+ break;
+ case 8:
+ *gpr = *(s64 *)run->iocsr_io.data;
+ break;
+ default:
+ kvm_err("Bad IOCSR length: %d, addr is 0x%lx\n",
+ run->iocsr_io.len, vcpu->arch.badv);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ return er;
+}
+
+int kvm_emu_idle(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.idle_exits;
+ trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE);
+
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ /*
+ * Switch to the software timer before halt-polling/blocking as
+ * the guest's timer may be a break event for the vCPU, and the
+ * hypervisor timer runs only when the CPU is in guest mode.
+ * Switch before halt-polling so that KVM recognizes an expired
+ * timer before blocking.
+ */
+ kvm_save_timer(vcpu);
+ kvm_vcpu_block(vcpu);
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
+ int rd, rj;
+ unsigned int index;
+ unsigned long curr_pc;
+ larch_inst inst;
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_run *run = vcpu->run;
+
+ /* Fetch the instruction */
+ inst.word = vcpu->arch.badi;
+ curr_pc = vcpu->arch.pc;
+ update_pc(&vcpu->arch);
+
+ trace_kvm_exit_gspr(vcpu, inst.word);
+ er = EMULATE_FAIL;
+ switch (((inst.word >> 24) & 0xff)) {
+ case 0x0: /* CPUCFG GSPR */
+ if (inst.reg2_format.opcode == 0x1B) {
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ ++vcpu->stat.cpucfg_exits;
+ index = vcpu->arch.gprs[rj];
+ er = EMULATE_DONE;
+ /*
+ * By LoongArch Reference Manual 2.2.10.5
+ * return value is 0 for undefined cpucfg index
+ */
+ if (index < KVM_MAX_CPUCFG_REGS)
+ vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+ else
+ vcpu->arch.gprs[rd] = 0;
+ }
+ break;
+ case 0x4: /* CSR{RD,WR,XCHG} GSPR */
+ er = kvm_handle_csr(vcpu, inst);
+ break;
+ case 0x6: /* Cache, Idle and IOCSR GSPR */
+ switch (((inst.word >> 22) & 0x3ff)) {
+ case 0x18: /* Cache GSPR */
+ er = EMULATE_DONE;
+ trace_kvm_exit_cache(vcpu, KVM_TRACE_EXIT_CACHE);
+ break;
+ case 0x19: /* Idle/IOCSR GSPR */
+ switch (((inst.word >> 15) & 0x1ffff)) {
+ case 0xc90: /* IOCSR GSPR */
+ er = kvm_emu_iocsr(inst, run, vcpu);
+ break;
+ case 0xc91: /* Idle GSPR */
+ er = kvm_emu_idle(vcpu);
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ /* Rollback PC only if emulation was unsuccessful */
+ if (er == EMULATE_FAIL) {
+ kvm_err("[%#lx]%s: unsupported gspr instruction 0x%08x\n",
+ curr_pc, __func__, inst.word);
+
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->arch.pc = curr_pc;
+ }
+
+ return er;
+}
+
+/*
+ * Trigger GSPR:
+ * 1) Execute CPUCFG instruction;
+ * 2) Execute CACOP/IDLE instructions;
+ * 3) Access to unimplemented CSRs/IOCSRs.
+ */
+static int kvm_handle_gspr(struct kvm_vcpu *vcpu)
+{
+ int ret = RESUME_GUEST;
+ enum emulation_result er = EMULATE_DONE;
+
+ er = kvm_trap_handle_gspr(vcpu);
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else if (er == EMULATE_DO_IOCSR) {
+ vcpu->run->exit_reason = KVM_EXIT_LOONGARCH_IOCSR;
+ ret = RESUME_HOST;
+ } else {
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+ ret = RESUME_GUEST;
+ }
+
+ return ret;
+}
+
+int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int ret;
+ unsigned int op8, opcode, rd;
+ struct kvm_run *run = vcpu->run;
+
+ run->mmio.phys_addr = vcpu->arch.badv;
+ vcpu->mmio_needed = 2; /* signed */
+ op8 = (inst.word >> 24) & 0xff;
+ ret = EMULATE_DO_MMIO;
+
+ switch (op8) {
+ case 0x24 ... 0x27: /* ldptr.w/d process */
+ rd = inst.reg2i14_format.rd;
+ opcode = inst.reg2i14_format.opcode;
+
+ switch (opcode) {
+ case ldptrw_op:
+ run->mmio.len = 4;
+ break;
+ case ldptrd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x28 ... 0x2e: /* ld.b/h/w/d, ld.bu/hu/wu process */
+ rd = inst.reg2i12_format.rd;
+ opcode = inst.reg2i12_format.opcode;
+
+ switch (opcode) {
+ case ldb_op:
+ run->mmio.len = 1;
+ break;
+ case ldbu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 1;
+ break;
+ case ldh_op:
+ run->mmio.len = 2;
+ break;
+ case ldhu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 2;
+ break;
+ case ldw_op:
+ run->mmio.len = 4;
+ break;
+ case ldwu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 4;
+ break;
+ case ldd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x38: /* ldx.b/h/w/d, ldx.bu/hu/wu process */
+ rd = inst.reg3_format.rd;
+ opcode = inst.reg3_format.opcode;
+
+ switch (opcode) {
+ case ldxb_op:
+ run->mmio.len = 1;
+ break;
+ case ldxbu_op:
+ run->mmio.len = 1;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxh_op:
+ run->mmio.len = 2;
+ break;
+ case ldxhu_op:
+ run->mmio.len = 2;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxw_op:
+ run->mmio.len = 4;
+ break;
+ case ldxwu_op:
+ run->mmio.len = 4;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ }
+
+ if (ret == EMULATE_DO_MMIO) {
+ /* Set for kvm_complete_mmio_read() use */
+ vcpu->arch.io_gpr = rd;
+ run->mmio.is_write = 0;
+ vcpu->mmio_is_write = 0;
+ } else {
+ kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
+ inst.word, vcpu->arch.pc, vcpu->arch.badv);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->mmio_needed = 0;
+ }
+
+ return ret;
+}
+
+int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ enum emulation_result er = EMULATE_DONE;
+ unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
+
+ /* Update with new PC */
+ update_pc(&vcpu->arch);
+ switch (run->mmio.len) {
+ case 1:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s8 *)run->mmio.data;
+ else
+ *gpr = *(u8 *)run->mmio.data;
+ break;
+ case 2:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s16 *)run->mmio.data;
+ else
+ *gpr = *(u16 *)run->mmio.data;
+ break;
+ case 4:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s32 *)run->mmio.data;
+ else
+ *gpr = *(u32 *)run->mmio.data;
+ break;
+ case 8:
+ *gpr = *(s64 *)run->mmio.data;
+ break;
+ default:
+ kvm_err("Bad MMIO length: %d, addr is 0x%lx\n",
+ run->mmio.len, vcpu->arch.badv);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ return er;
+}
+
+int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int ret;
+ unsigned int rd, op8, opcode;
+ unsigned long curr_pc, rd_val = 0;
+ struct kvm_run *run = vcpu->run;
+ void *data = run->mmio.data;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ update_pc(&vcpu->arch);
+
+ op8 = (inst.word >> 24) & 0xff;
+ run->mmio.phys_addr = vcpu->arch.badv;
+ ret = EMULATE_DO_MMIO;
+ switch (op8) {
+ case 0x24 ... 0x27: /* stptr.w/d process */
+ rd = inst.reg2i14_format.rd;
+ opcode = inst.reg2i14_format.opcode;
+
+ switch (opcode) {
+ case stptrw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = vcpu->arch.gprs[rd];
+ break;
+ case stptrd_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = vcpu->arch.gprs[rd];
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x28 ... 0x2e: /* st.b/h/w/d process */
+ rd = inst.reg2i12_format.rd;
+ opcode = inst.reg2i12_format.opcode;
+ rd_val = vcpu->arch.gprs[rd];
+
+ switch (opcode) {
+ case stb_op:
+ run->mmio.len = 1;
+ *(unsigned char *)data = rd_val;
+ break;
+ case sth_op:
+ run->mmio.len = 2;
+ *(unsigned short *)data = rd_val;
+ break;
+ case stw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = rd_val;
+ break;
+ case std_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = rd_val;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x38: /* stx.b/h/w/d process */
+ rd = inst.reg3_format.rd;
+ opcode = inst.reg3_format.opcode;
+
+ switch (opcode) {
+ case stxb_op:
+ run->mmio.len = 1;
+ *(unsigned char *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxh_op:
+ run->mmio.len = 2;
+ *(unsigned short *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxd_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = vcpu->arch.gprs[rd];
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ }
+
+ if (ret == EMULATE_DO_MMIO) {
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+ } else {
+ vcpu->arch.pc = curr_pc;
+ kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
+ inst.word, vcpu->arch.pc, vcpu->arch.badv);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ /* Rollback PC if emulation was unsuccessful */
+ }
+
+ return ret;
+}
+
+static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write)
+{
+ int ret;
+ larch_inst inst;
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_run *run = vcpu->run;
+ unsigned long badv = vcpu->arch.badv;
+
+ ret = kvm_handle_mm_fault(vcpu, badv, write);
+ if (ret) {
+ /* Treat as MMIO */
+ inst.word = vcpu->arch.badi;
+ if (write) {
+ er = kvm_emu_mmio_write(vcpu, inst);
+ } else {
+ /* A code fetch fault doesn't count as an MMIO */
+ if (kvm_is_ifetch_fault(&vcpu->arch)) {
+ kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEF);
+ return RESUME_GUEST;
+ }
+
+ er = kvm_emu_mmio_read(vcpu, inst);
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM);
+ ret = RESUME_GUEST;
+ }
+
+ return ret;
+}
+
+static int kvm_handle_read_fault(struct kvm_vcpu *vcpu)
+{
+ return kvm_handle_rdwr_fault(vcpu, false);
+}
+
+static int kvm_handle_write_fault(struct kvm_vcpu *vcpu)
+{
+ return kvm_handle_rdwr_fault(vcpu, true);
+}
+
+/**
+ * kvm_handle_fpu_disabled() - Guest used fpu however it is disabled at host
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use fpu which hasn't been allowed
+ * by the root context.
+ */
+static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /*
+ * If guest FPU not present, the FPU operation should have been
+ * treated as a reserved instruction!
+ * If FPU already in use, we shouldn't get this at all.
+ */
+ if (WARN_ON(vcpu->arch.aux_inuse & KVM_LARCH_FPU)) {
+ kvm_err("%s internal error\n", __func__);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ kvm_own_fpu(vcpu);
+
+ return RESUME_GUEST;
+}
+
+/*
+ * LoongArch KVM callback handling for unimplemented guest exiting
+ */
+static int kvm_fault_ni(struct kvm_vcpu *vcpu)
+{
+ unsigned int ecode, inst;
+ unsigned long estat, badv;
+
+ /* Fetch the instruction */
+ inst = vcpu->arch.badi;
+ badv = vcpu->arch.badv;
+ estat = vcpu->arch.host_estat;
+ ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ kvm_err("ECode: %d PC=%#lx Inst=0x%08x BadVaddr=%#lx ESTAT=%#lx\n",
+ ecode, vcpu->arch.pc, inst, badv, read_gcsr_estat());
+ kvm_arch_vcpu_dump_regs(vcpu);
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
+ [0 ... EXCCODE_INT_START - 1] = kvm_fault_ni,
+ [EXCCODE_TLBI] = kvm_handle_read_fault,
+ [EXCCODE_TLBL] = kvm_handle_read_fault,
+ [EXCCODE_TLBS] = kvm_handle_write_fault,
+ [EXCCODE_TLBM] = kvm_handle_write_fault,
+ [EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
+ [EXCCODE_GSPR] = kvm_handle_gspr,
+};
+
+int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
+{
+ return kvm_fault_tables[fault](vcpu);
+}
diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c
new file mode 100644
index 0000000000..4c3f22de4b
--- /dev/null
+++ b/arch/loongarch/kvm/interrupt.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+
+static unsigned int priority_to_irq[EXCCODE_INT_NUM] = {
+ [INT_TI] = CPU_TIMER,
+ [INT_IPI] = CPU_IPI,
+ [INT_SWI0] = CPU_SIP0,
+ [INT_SWI1] = CPU_SIP1,
+ [INT_HWI0] = CPU_IP0,
+ [INT_HWI1] = CPU_IP1,
+ [INT_HWI2] = CPU_IP2,
+ [INT_HWI3] = CPU_IP3,
+ [INT_HWI4] = CPU_IP4,
+ [INT_HWI5] = CPU_IP5,
+ [INT_HWI6] = CPU_IP6,
+ [INT_HWI7] = CPU_IP7,
+};
+
+static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ unsigned int irq = 0;
+
+ clear_bit(priority, &vcpu->arch.irq_pending);
+ if (priority < EXCCODE_INT_NUM)
+ irq = priority_to_irq[priority];
+
+ switch (priority) {
+ case INT_TI:
+ case INT_IPI:
+ case INT_SWI0:
+ case INT_SWI1:
+ set_gcsr_estat(irq);
+ break;
+
+ case INT_HWI0 ... INT_HWI7:
+ set_csr_gintc(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ unsigned int irq = 0;
+
+ clear_bit(priority, &vcpu->arch.irq_clear);
+ if (priority < EXCCODE_INT_NUM)
+ irq = priority_to_irq[priority];
+
+ switch (priority) {
+ case INT_TI:
+ case INT_IPI:
+ case INT_SWI0:
+ case INT_SWI1:
+ clear_gcsr_estat(irq);
+ break;
+
+ case INT_HWI0 ... INT_HWI7:
+ clear_csr_gintc(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+void kvm_deliver_intr(struct kvm_vcpu *vcpu)
+{
+ unsigned int priority;
+ unsigned long *pending = &vcpu->arch.irq_pending;
+ unsigned long *pending_clr = &vcpu->arch.irq_clear;
+
+ if (!(*pending) && !(*pending_clr))
+ return;
+
+ if (*pending_clr) {
+ priority = __ffs(*pending_clr);
+ while (priority <= INT_IPI) {
+ kvm_irq_clear(vcpu, priority);
+ priority = find_next_bit(pending_clr,
+ BITS_PER_BYTE * sizeof(*pending_clr),
+ priority + 1);
+ }
+ }
+
+ if (*pending) {
+ priority = __ffs(*pending);
+ while (priority <= INT_IPI) {
+ kvm_irq_deliver(vcpu, priority);
+ priority = find_next_bit(pending,
+ BITS_PER_BYTE * sizeof(*pending),
+ priority + 1);
+ }
+ }
+}
+
+int kvm_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return test_bit(INT_TI, &vcpu->arch.irq_pending);
+}
+
+/*
+ * Only support illegal instruction or illegal Address Error exception,
+ * Other exceptions are injected by hardware in kvm mode
+ */
+static void _kvm_deliver_exception(struct kvm_vcpu *vcpu,
+ unsigned int code, unsigned int subcode)
+{
+ unsigned long val, vec_size;
+
+ /*
+ * BADV is added for EXCCODE_ADE exception
+ * Use PC register (GVA address) if it is instruction exeception
+ * Else use BADV from host side (GPA address) for data exeception
+ */
+ if (code == EXCCODE_ADE) {
+ if (subcode == EXSUBCODE_ADEF)
+ val = vcpu->arch.pc;
+ else
+ val = vcpu->arch.badv;
+ kvm_write_hw_gcsr(LOONGARCH_CSR_BADV, val);
+ }
+
+ /* Set exception instruction */
+ kvm_write_hw_gcsr(LOONGARCH_CSR_BADI, vcpu->arch.badi);
+
+ /*
+ * Save CRMD in PRMD
+ * Set IRQ disabled and PLV0 with CRMD
+ */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD);
+ kvm_write_hw_gcsr(LOONGARCH_CSR_PRMD, val);
+ val = val & ~(CSR_CRMD_PLV | CSR_CRMD_IE);
+ kvm_write_hw_gcsr(LOONGARCH_CSR_CRMD, val);
+
+ /* Set exception PC address */
+ kvm_write_hw_gcsr(LOONGARCH_CSR_ERA, vcpu->arch.pc);
+
+ /*
+ * Set exception code
+ * Exception and interrupt can be inject at the same time
+ * Hardware will handle exception first and then extern interrupt
+ * Exception code is Ecode in ESTAT[16:21]
+ * Interrupt code in ESTAT[0:12]
+ */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT);
+ val = (val & ~CSR_ESTAT_EXC) | code;
+ kvm_write_hw_gcsr(LOONGARCH_CSR_ESTAT, val);
+
+ /* Calculate expcetion entry address */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_ECFG);
+ vec_size = (val & CSR_ECFG_VS) >> CSR_ECFG_VS_SHIFT;
+ if (vec_size)
+ vec_size = (1 << vec_size) * 4;
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_EENTRY);
+ vcpu->arch.pc = val + code * vec_size;
+}
+
+void kvm_deliver_exception(struct kvm_vcpu *vcpu)
+{
+ unsigned int code;
+ unsigned long *pending = &vcpu->arch.exception_pending;
+
+ if (*pending) {
+ code = __ffs(*pending);
+ _kvm_deliver_exception(vcpu, code, vcpu->arch.esubcode);
+ *pending = 0;
+ vcpu->arch.esubcode = 0;
+ }
+}
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
new file mode 100644
index 0000000000..1c1d519950
--- /dev/null
+++ b/arch/loongarch/kvm/main.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_csr.h>
+#include "trace.h"
+
+unsigned long vpid_mask;
+struct kvm_world_switch *kvm_loongarch_ops;
+static int gcsr_flag[CSR_MAX_NUMS];
+static struct kvm_context __percpu *vmcs;
+
+int get_gcsr_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ return gcsr_flag[csr];
+
+ return INVALID_GCSR;
+}
+
+static inline void set_gcsr_sw_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ gcsr_flag[csr] |= SW_GCSR;
+}
+
+static inline void set_gcsr_hw_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ gcsr_flag[csr] |= HW_GCSR;
+}
+
+/*
+ * The default value of gcsr_flag[CSR] is 0, and we use this
+ * function to set the flag to 1 (SW_GCSR) or 2 (HW_GCSR) if the
+ * gcsr is software or hardware. It will be used by get/set_gcsr,
+ * if gcsr_flag is HW we should use gcsrrd/gcsrwr to access it,
+ * else use software csr to emulate it.
+ */
+static void kvm_init_gcsr_flag(void)
+{
+ set_gcsr_hw_flag(LOONGARCH_CSR_CRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_EUEN);
+ set_gcsr_hw_flag(LOONGARCH_CSR_MISC);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ECFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ESTAT);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ERA);
+ set_gcsr_hw_flag(LOONGARCH_CSR_BADV);
+ set_gcsr_hw_flag(LOONGARCH_CSR_BADI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_EENTRY);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBIDX);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBEHI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ASID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGDL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGDH);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_STLBPGSIZE);
+ set_gcsr_hw_flag(LOONGARCH_CSR_RVACFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_CPUID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG3);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS3);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS4);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS5);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS6);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS7);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TMID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TCFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TVAL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TINTCLR);
+ set_gcsr_hw_flag(LOONGARCH_CSR_CNTC);
+ set_gcsr_hw_flag(LOONGARCH_CSR_LLBCTL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRENTRY);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRBADV);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRERA);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRSAVE);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBREHI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRPRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN3);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRCTL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRENTRY);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRERA);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRSAVE);
+ set_gcsr_sw_flag(LOONGARCH_CSR_CTAG);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DEBUG);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DERA);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DESAVE);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_FWPC);
+ set_gcsr_sw_flag(LOONGARCH_CSR_FWPS);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MWPC);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MWPS);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7ASID);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7ASID);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL0);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR0);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL3);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR3);
+}
+
+static void kvm_update_vpid(struct kvm_vcpu *vcpu, int cpu)
+{
+ unsigned long vpid;
+ struct kvm_context *context;
+
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ vpid = context->vpid_cache + 1;
+ if (!(vpid & vpid_mask)) {
+ /* finish round of vpid loop */
+ if (unlikely(!vpid))
+ vpid = vpid_mask + 1;
+
+ ++vpid; /* vpid 0 reserved for root */
+
+ /* start new vpid cycle */
+ kvm_flush_tlb_all();
+ }
+
+ context->vpid_cache = vpid;
+ vcpu->arch.vpid = vpid;
+}
+
+void kvm_check_vpid(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ bool migrated;
+ unsigned long ver, old, vpid;
+ struct kvm_context *context;
+
+ cpu = smp_processor_id();
+ /*
+ * Are we entering guest context on a different CPU to last time?
+ * If so, the vCPU's guest TLB state on this CPU may be stale.
+ */
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ migrated = (vcpu->cpu != cpu);
+
+ /*
+ * Check if our vpid is of an older version
+ *
+ * We also discard the stored vpid if we've executed on
+ * another CPU, as the guest mappings may have changed without
+ * hypervisor knowledge.
+ */
+ ver = vcpu->arch.vpid & ~vpid_mask;
+ old = context->vpid_cache & ~vpid_mask;
+ if (migrated || (ver != old)) {
+ kvm_update_vpid(vcpu, cpu);
+ trace_kvm_vpid_change(vcpu, vcpu->arch.vpid);
+ vcpu->cpu = cpu;
+ }
+
+ /* Restore GSTAT(0x50).vpid */
+ vpid = (vcpu->arch.vpid & vpid_mask) << CSR_GSTAT_GID_SHIFT;
+ change_csr_gstat(vpid_mask << CSR_GSTAT_GID_SHIFT, vpid);
+}
+
+void kvm_init_vmcs(struct kvm *kvm)
+{
+ kvm->arch.vmcs = vmcs;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_hardware_enable(void)
+{
+ unsigned long env, gcfg = 0;
+
+ env = read_csr_gcfg();
+
+ /* First init gcfg, gstat, gintc, gtlbc. All guest use the same config */
+ write_csr_gcfg(0);
+ write_csr_gstat(0);
+ write_csr_gintc(0);
+ clear_csr_gtlbc(CSR_GTLBC_USETGID | CSR_GTLBC_TOTI);
+
+ /*
+ * Enable virtualization features granting guest direct control of
+ * certain features:
+ * GCI=2: Trap on init or unimplement cache instruction.
+ * TORU=0: Trap on Root Unimplement.
+ * CACTRL=1: Root control cache.
+ * TOP=0: Trap on Previlege.
+ * TOE=0: Trap on Exception.
+ * TIT=0: Trap on Timer.
+ */
+ if (env & CSR_GCFG_GCIP_ALL)
+ gcfg |= CSR_GCFG_GCI_SECURE;
+ if (env & CSR_GCFG_MATC_ROOT)
+ gcfg |= CSR_GCFG_MATC_ROOT;
+
+ gcfg |= CSR_GCFG_TIT;
+ write_csr_gcfg(gcfg);
+
+ kvm_flush_tlb_all();
+
+ /* Enable using TGID */
+ set_csr_gtlbc(CSR_GTLBC_USETGID);
+ kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx",
+ read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc());
+
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ write_csr_gcfg(0);
+ write_csr_gstat(0);
+ write_csr_gintc(0);
+ clear_csr_gtlbc(CSR_GTLBC_USETGID | CSR_GTLBC_TOTI);
+
+ /* Flush any remaining guest TLB entries */
+ kvm_flush_tlb_all();
+}
+
+static int kvm_loongarch_env_init(void)
+{
+ int cpu, order;
+ void *addr;
+ struct kvm_context *context;
+
+ vmcs = alloc_percpu(struct kvm_context);
+ if (!vmcs) {
+ pr_err("kvm: failed to allocate percpu kvm_context\n");
+ return -ENOMEM;
+ }
+
+ kvm_loongarch_ops = kzalloc(sizeof(*kvm_loongarch_ops), GFP_KERNEL);
+ if (!kvm_loongarch_ops) {
+ free_percpu(vmcs);
+ vmcs = NULL;
+ return -ENOMEM;
+ }
+
+ /*
+ * PGD register is shared between root kernel and kvm hypervisor.
+ * So world switch entry should be in DMW area rather than TLB area
+ * to avoid page fault reenter.
+ *
+ * In future if hardware pagetable walking is supported, we won't
+ * need to copy world switch code to DMW area.
+ */
+ order = get_order(kvm_exception_size + kvm_enter_guest_size);
+ addr = (void *)__get_free_pages(GFP_KERNEL, order);
+ if (!addr) {
+ free_percpu(vmcs);
+ vmcs = NULL;
+ kfree(kvm_loongarch_ops);
+ kvm_loongarch_ops = NULL;
+ return -ENOMEM;
+ }
+
+ memcpy(addr, kvm_exc_entry, kvm_exception_size);
+ memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size);
+ flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size);
+ kvm_loongarch_ops->exc_entry = addr;
+ kvm_loongarch_ops->enter_guest = addr + kvm_exception_size;
+ kvm_loongarch_ops->page_order = order;
+
+ vpid_mask = read_csr_gstat();
+ vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
+ if (vpid_mask)
+ vpid_mask = GENMASK(vpid_mask - 1, 0);
+
+ for_each_possible_cpu(cpu) {
+ context = per_cpu_ptr(vmcs, cpu);
+ context->vpid_cache = vpid_mask + 1;
+ context->last_vcpu = NULL;
+ }
+
+ kvm_init_gcsr_flag();
+
+ return 0;
+}
+
+static void kvm_loongarch_env_exit(void)
+{
+ unsigned long addr;
+
+ if (vmcs)
+ free_percpu(vmcs);
+
+ if (kvm_loongarch_ops) {
+ if (kvm_loongarch_ops->exc_entry) {
+ addr = (unsigned long)kvm_loongarch_ops->exc_entry;
+ free_pages(addr, kvm_loongarch_ops->page_order);
+ }
+ kfree(kvm_loongarch_ops);
+ }
+}
+
+static int kvm_loongarch_init(void)
+{
+ int r;
+
+ if (!cpu_has_lvz) {
+ kvm_info("Hardware virtualization not available\n");
+ return -ENODEV;
+ }
+ r = kvm_loongarch_env_init();
+ if (r)
+ return r;
+
+ return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+
+static void kvm_loongarch_exit(void)
+{
+ kvm_exit();
+ kvm_loongarch_env_exit();
+}
+
+module_init(kvm_loongarch_init);
+module_exit(kvm_loongarch_exit);
+
+#ifdef MODULE
+static const struct cpu_feature kvm_feature[] = {
+ { .feature = cpu_feature(LOONGARCH_LVZ) },
+ {},
+};
+MODULE_DEVICE_TABLE(cpu, kvm_feature);
+#endif
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
new file mode 100644
index 0000000000..80480df5f5
--- /dev/null
+++ b/arch/loongarch/kvm/mmu.c
@@ -0,0 +1,914 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/kvm_host.h>
+#include <linux/page-flags.h>
+#include <linux/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/kvm_mmu.h>
+
+static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
+{
+ ctx->level = kvm->arch.root_level;
+ /* pte table */
+ ctx->invalid_ptes = kvm->arch.invalid_ptes;
+ ctx->pte_shifts = kvm->arch.pte_shifts;
+ ctx->pgtable_shift = ctx->pte_shifts[ctx->level];
+ ctx->invalid_entry = ctx->invalid_ptes[ctx->level];
+ ctx->opaque = kvm;
+}
+
+/*
+ * Mark a range of guest physical address space old (all accesses fault) in the
+ * VM's GPA page table to allow detection of commonly used pages.
+ */
+static int kvm_mkold_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ if (kvm_pte_young(*pte)) {
+ *pte = kvm_pte_mkold(*pte);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Mark a range of guest physical address space clean (writes fault) in the VM's
+ * GPA page table to allow dirty page tracking.
+ */
+static int kvm_mkclean_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ gfn_t offset;
+ kvm_pte_t val;
+
+ val = *pte;
+ /*
+ * For kvm_arch_mmu_enable_log_dirty_pt_masked with mask, start and end
+ * may cross hugepage, for first huge page parameter addr is equal to
+ * start, however for the second huge page addr is base address of
+ * this huge page, rather than start or end address
+ */
+ if ((ctx->flag & _KVM_HAS_PGMASK) && !kvm_pte_huge(val)) {
+ offset = (addr >> PAGE_SHIFT) - ctx->gfn;
+ if (!(BIT(offset) & ctx->mask))
+ return 0;
+ }
+
+ /*
+ * Need not split huge page now, just set write-proect pte bit
+ * Split huge page until next write fault
+ */
+ if (kvm_pte_dirty(val)) {
+ *pte = kvm_pte_mkclean(val);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Clear pte entry
+ */
+static int kvm_flush_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ struct kvm *kvm;
+
+ kvm = ctx->opaque;
+ if (ctx->level)
+ kvm->stat.hugepages--;
+ else
+ kvm->stat.pages--;
+
+ *pte = ctx->invalid_entry;
+
+ return 1;
+}
+
+/*
+ * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
+ *
+ * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
+ * to host physical page mappings.
+ *
+ * Returns: Pointer to new KVM GPA page directory.
+ * NULL on allocation failure.
+ */
+kvm_pte_t *kvm_pgd_alloc(void)
+{
+ kvm_pte_t *pgd;
+
+ pgd = (kvm_pte_t *)__get_free_pages(GFP_KERNEL, 0);
+ if (pgd)
+ pgd_init((void *)pgd);
+
+ return pgd;
+}
+
+static void _kvm_pte_init(void *addr, unsigned long val)
+{
+ unsigned long *p, *end;
+
+ p = (unsigned long *)addr;
+ end = p + PTRS_PER_PTE;
+ do {
+ p[0] = val;
+ p[1] = val;
+ p[2] = val;
+ p[3] = val;
+ p[4] = val;
+ p += 8;
+ p[-3] = val;
+ p[-2] = val;
+ p[-1] = val;
+ } while (p != end);
+}
+
+/*
+ * Caller must hold kvm->mm_lock
+ *
+ * Walk the page tables of kvm to find the PTE corresponding to the
+ * address @addr. If page tables don't exist for @addr, they will be created
+ * from the MMU cache if @cache is not NULL.
+ */
+static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm,
+ struct kvm_mmu_memory_cache *cache,
+ unsigned long addr, int level)
+{
+ kvm_ptw_ctx ctx;
+ kvm_pte_t *entry, *child;
+
+ kvm_ptw_prepare(kvm, &ctx);
+ child = kvm->arch.pgd;
+ while (ctx.level > level) {
+ entry = kvm_pgtable_offset(&ctx, child, addr);
+ if (kvm_pte_none(&ctx, entry)) {
+ if (!cache)
+ return NULL;
+
+ child = kvm_mmu_memory_cache_alloc(cache);
+ _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]);
+ kvm_set_pte(entry, __pa(child));
+ } else if (kvm_pte_huge(*entry)) {
+ return entry;
+ } else
+ child = (kvm_pte_t *)__va(PHYSADDR(*entry));
+ kvm_ptw_enter(&ctx);
+ }
+
+ entry = kvm_pgtable_offset(&ctx, child, addr);
+
+ return entry;
+}
+
+/*
+ * Page walker for VM shadow mmu at last level
+ * The last level is small pte page or huge pmd page
+ */
+static int kvm_ptw_leaf(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next, start, size;
+ struct list_head *list;
+ kvm_pte_t *entry, *child;
+
+ ret = 0;
+ start = addr;
+ child = (kvm_pte_t *)__va(PHYSADDR(*dir));
+ entry = kvm_pgtable_offset(ctx, child, addr);
+ do {
+ next = addr + (0x1UL << ctx->pgtable_shift);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ ret |= ctx->ops(entry, addr, ctx);
+ } while (entry++, addr = next, addr < end);
+
+ if (kvm_need_flush(ctx)) {
+ size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3);
+ if (start + size == end) {
+ list = (struct list_head *)child;
+ list_add_tail(list, &ctx->list);
+ *dir = ctx->invalid_ptes[ctx->level + 1];
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Page walker for VM shadow mmu at page table dir level
+ */
+static int kvm_ptw_dir(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next, start, size;
+ struct list_head *list;
+ kvm_pte_t *entry, *child;
+
+ ret = 0;
+ start = addr;
+ child = (kvm_pte_t *)__va(PHYSADDR(*dir));
+ entry = kvm_pgtable_offset(ctx, child, addr);
+ do {
+ next = kvm_pgtable_addr_end(ctx, addr, end);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ if (kvm_pte_huge(*entry)) {
+ ret |= ctx->ops(entry, addr, ctx);
+ continue;
+ }
+
+ kvm_ptw_enter(ctx);
+ if (ctx->level == 0)
+ ret |= kvm_ptw_leaf(entry, addr, next, ctx);
+ else
+ ret |= kvm_ptw_dir(entry, addr, next, ctx);
+ kvm_ptw_exit(ctx);
+ } while (entry++, addr = next, addr < end);
+
+ if (kvm_need_flush(ctx)) {
+ size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3);
+ if (start + size == end) {
+ list = (struct list_head *)child;
+ list_add_tail(list, &ctx->list);
+ *dir = ctx->invalid_ptes[ctx->level + 1];
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Page walker for VM shadow mmu at page root table
+ */
+static int kvm_ptw_top(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next;
+ kvm_pte_t *entry;
+
+ ret = 0;
+ entry = kvm_pgtable_offset(ctx, dir, addr);
+ do {
+ next = kvm_pgtable_addr_end(ctx, addr, end);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ kvm_ptw_enter(ctx);
+ ret |= kvm_ptw_dir(entry, addr, next, ctx);
+ kvm_ptw_exit(ctx);
+ } while (entry++, addr = next, addr < end);
+
+ return ret;
+}
+
+/*
+ * kvm_flush_range() - Flush a range of guest physical addresses.
+ * @kvm: KVM pointer.
+ * @start_gfn: Guest frame number of first page in GPA range to flush.
+ * @end_gfn: Guest frame number of last page in GPA range to flush.
+ * @lock: Whether to hold mmu_lock or not
+ *
+ * Flushes a range of GPA mappings from the GPA page tables.
+ */
+static void kvm_flush_range(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn, int lock)
+{
+ int ret;
+ kvm_ptw_ctx ctx;
+ struct list_head *pos, *temp;
+
+ ctx.ops = kvm_flush_pte;
+ ctx.flag = _KVM_FLUSH_PGTABLE;
+ kvm_ptw_prepare(kvm, &ctx);
+ INIT_LIST_HEAD(&ctx.list);
+
+ if (lock) {
+ spin_lock(&kvm->mmu_lock);
+ ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT,
+ end_gfn << PAGE_SHIFT, &ctx);
+ spin_unlock(&kvm->mmu_lock);
+ } else
+ ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT,
+ end_gfn << PAGE_SHIFT, &ctx);
+
+ /* Flush vpid for each vCPU individually */
+ if (ret)
+ kvm_flush_remote_tlbs(kvm);
+
+ /*
+ * free pte table page after mmu_lock
+ * the pte table page is linked together with ctx.list
+ */
+ list_for_each_safe(pos, temp, &ctx.list) {
+ list_del(pos);
+ free_page((unsigned long)pos);
+ }
+}
+
+/*
+ * kvm_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
+ * @kvm: KVM pointer.
+ * @start_gfn: Guest frame number of first page in GPA range to flush.
+ * @end_gfn: Guest frame number of last page in GPA range to flush.
+ *
+ * Make a range of GPA mappings clean so that guest writes will fault and
+ * trigger dirty page logging.
+ *
+ * The caller must hold the @kvm->mmu_lock spinlock.
+ *
+ * Returns: Whether any GPA mappings were modified, which would require
+ * derived mappings (GVA page tables & TLB enties) to be
+ * invalidated.
+ */
+static int kvm_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.ops = kvm_mkclean_pte;
+ ctx.flag = 0;
+ kvm_ptw_prepare(kvm, &ctx);
+ return kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT, end_gfn << PAGE_SHIFT, &ctx);
+}
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
+ * @kvm: The KVM pointer
+ * @slot: The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
+ * slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire @kvm->mmu_lock.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
+{
+ kvm_ptw_ctx ctx;
+ gfn_t base_gfn = slot->base_gfn + gfn_offset;
+ gfn_t start = base_gfn + __ffs(mask);
+ gfn_t end = base_gfn + __fls(mask) + 1;
+
+ ctx.ops = kvm_mkclean_pte;
+ ctx.flag = _KVM_HAS_PGMASK;
+ ctx.mask = mask;
+ ctx.gfn = base_gfn;
+ kvm_ptw_prepare(kvm, &ctx);
+
+ kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ int needs_flush;
+
+ /*
+ * If dirty page logging is enabled, write protect all pages in the slot
+ * ready for dirty logging.
+ *
+ * There is no need to do this in any of the following cases:
+ * CREATE: No dirty mappings will already exist.
+ * MOVE/DELETE: The old mappings will already have been cleaned up by
+ * kvm_arch_flush_shadow_memslot()
+ */
+ if (change == KVM_MR_FLAGS_ONLY &&
+ (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+ new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+ spin_lock(&kvm->mmu_lock);
+ /* Write protect GPA page table entries */
+ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn,
+ new->base_gfn + new->npages);
+ spin_unlock(&kvm->mmu_lock);
+ if (needs_flush)
+ kvm_flush_remote_tlbs(kvm);
+ }
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_flush_range(kvm, 0, kvm->arch.gpa_size >> PAGE_SHIFT, 0);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ /*
+ * The slot has been made invalid (ready for moving or deletion), so we
+ * need to ensure that it can no longer be accessed by any guest vCPUs.
+ */
+ kvm_flush_range(kvm, slot->base_gfn, slot->base_gfn + slot->npages, 1);
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.flag = 0;
+ ctx.ops = kvm_flush_pte;
+ kvm_ptw_prepare(kvm, &ctx);
+ INIT_LIST_HEAD(&ctx.list);
+
+ return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
+ range->end << PAGE_SHIFT, &ctx);
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ unsigned long prot_bits;
+ kvm_pte_t *ptep;
+ kvm_pfn_t pfn = pte_pfn(range->arg.pte);
+ gpa_t gpa = range->start << PAGE_SHIFT;
+
+ ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+ if (!ptep)
+ return false;
+
+ /* Replacing an absent or old page doesn't need flushes */
+ if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
+ kvm_set_pte(ptep, 0);
+ return false;
+ }
+
+ /* Fill new pte if write protected or page migrated */
+ prot_bits = _PAGE_PRESENT | __READABLE;
+ prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);
+
+ /*
+ * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
+ * _PAGE_WRITE for map_page_fast if next page write fault
+ * _PAGE_DIRTY since gpa has already recorded as dirty page
+ */
+ prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
+ kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));
+
+ return true;
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.flag = 0;
+ ctx.ops = kvm_mkold_pte;
+ kvm_ptw_prepare(kvm, &ctx);
+
+ return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
+ range->end << PAGE_SHIFT, &ctx);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ gpa_t gpa = range->start << PAGE_SHIFT;
+ kvm_pte_t *ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+
+ if (ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep))
+ return true;
+
+ return false;
+}
+
+/*
+ * kvm_map_page_fast() - Fast path GPA fault handler.
+ * @vcpu: vCPU pointer.
+ * @gpa: Guest physical address of fault.
+ * @write: Whether the fault was due to a write.
+ *
+ * Perform fast path GPA fault handling, doing all that can be done without
+ * calling into KVM. This handles marking old pages young (for idle page
+ * tracking), and dirtying of clean pages (for dirty page logging).
+ *
+ * Returns: 0 on success, in which case we can update derived mappings and
+ * resume guest execution.
+ * -EFAULT on failure due to absent GPA mapping or write to
+ * read-only page, in which case KVM must be consulted.
+ */
+static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
+{
+ int ret = 0;
+ kvm_pfn_t pfn = 0;
+ kvm_pte_t *ptep, changed, new;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *slot;
+
+ spin_lock(&kvm->mmu_lock);
+
+ /* Fast path - just check GPA page table for an existing entry */
+ ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+ if (!ptep || !kvm_pte_present(NULL, ptep)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* Track access to pages marked old */
+ new = *ptep;
+ if (!kvm_pte_young(new))
+ new = kvm_pte_mkyoung(new);
+ /* call kvm_set_pfn_accessed() after unlock */
+
+ if (write && !kvm_pte_dirty(new)) {
+ if (!kvm_pte_write(new)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (kvm_pte_huge(new)) {
+ /*
+ * Do not set write permission when dirty logging is
+ * enabled for HugePages
+ */
+ slot = gfn_to_memslot(kvm, gfn);
+ if (kvm_slot_dirty_track_enabled(slot)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ /* Track dirtying of writeable pages */
+ new = kvm_pte_mkdirty(new);
+ }
+
+ changed = new ^ (*ptep);
+ if (changed) {
+ kvm_set_pte(ptep, new);
+ pfn = kvm_pte_pfn(new);
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ /*
+ * Fixme: pfn may be freed after mmu_lock
+ * kvm_try_get_pfn(pfn)/kvm_release_pfn pair to prevent this?
+ */
+ if (kvm_pte_young(changed))
+ kvm_set_pfn_accessed(pfn);
+
+ if (kvm_pte_dirty(changed)) {
+ mark_page_dirty(kvm, gfn);
+ kvm_set_pfn_dirty(pfn);
+ }
+ return ret;
+out:
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
+
+static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
+ unsigned long hva, unsigned long map_size, bool write)
+{
+ size_t size;
+ gpa_t gpa_start;
+ hva_t uaddr_start, uaddr_end;
+
+ /* Disable dirty logging on HugePages */
+ if (kvm_slot_dirty_track_enabled(memslot) && write)
+ return false;
+
+ size = memslot->npages * PAGE_SIZE;
+ gpa_start = memslot->base_gfn << PAGE_SHIFT;
+ uaddr_start = memslot->userspace_addr;
+ uaddr_end = uaddr_start + size;
+
+ /*
+ * Pages belonging to memslots that don't have the same alignment
+ * within a PMD for userspace and GPA cannot be mapped with stage-2
+ * PMD entries, because we'll end up mapping the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SIZE:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def Stage-2 block | Stage-2 block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those stage-2 blocks, we'll end up with this incorrect
+ * mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
+ return false;
+
+ /*
+ * Next, let's make sure we're not trying to map anything not covered
+ * by the memslot. This means we have to prohibit block size mappings
+ * for the beginning and end of a non-block aligned and non-block sized
+ * memory slot (illustrated by the head and tail parts of the
+ * userspace view above containing pages 'abcde' and 'xyz',
+ * respectively).
+ *
+ * Note that it doesn't matter if we do the check using the
+ * userspace_addr or the base_gfn, as both are equally aligned (per
+ * the check above) and equally sized.
+ */
+ return (hva & ~(map_size - 1)) >= uaddr_start &&
+ (hva & ~(map_size - 1)) + map_size <= uaddr_end;
+}
+
+/*
+ * Lookup the mapping level for @gfn in the current mm.
+ *
+ * WARNING! Use of host_pfn_mapping_level() requires the caller and the end
+ * consumer to be tied into KVM's handlers for MMU notifier events!
+ *
+ * There are several ways to safely use this helper:
+ *
+ * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
+ * consuming it. In this case, mmu_lock doesn't need to be held during the
+ * lookup, but it does need to be held while checking the MMU notifier.
+ *
+ * - Hold mmu_lock AND ensure there is no in-progress MMU notifier invalidation
+ * event for the hva. This can be done by explicit checking the MMU notifier
+ * or by ensuring that KVM already has a valid mapping that covers the hva.
+ *
+ * - Do not use the result to install new mappings, e.g. use the host mapping
+ * level only to decide whether or not to zap an entry. In this case, it's
+ * not required to hold mmu_lock (though it's highly likely the caller will
+ * want to hold mmu_lock anyways, e.g. to modify SPTEs).
+ *
+ * Note! The lookup can still race with modifications to host page tables, but
+ * the above "rules" ensure KVM will not _consume_ the result of the walk if a
+ * race with the primary MMU occurs.
+ */
+static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
+ const struct kvm_memory_slot *slot)
+{
+ int level = 0;
+ unsigned long hva;
+ unsigned long flags;
+ pgd_t pgd;
+ p4d_t p4d;
+ pud_t pud;
+ pmd_t pmd;
+
+ /*
+ * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
+ * is not solely for performance, it's also necessary to avoid the
+ * "writable" check in __gfn_to_hva_many(), which will always fail on
+ * read-only memslots due to gfn_to_hva() assuming writes. Earlier
+ * page fault steps have already verified the guest isn't writing a
+ * read-only memslot.
+ */
+ hva = __gfn_to_hva_memslot(slot, gfn);
+
+ /*
+ * Disable IRQs to prevent concurrent tear down of host page tables,
+ * e.g. if the primary MMU promotes a P*D to a huge page and then frees
+ * the original page table.
+ */
+ local_irq_save(flags);
+
+ /*
+ * Read each entry once. As above, a non-leaf entry can be promoted to
+ * a huge page _during_ this walk. Re-reading the entry could send the
+ * walk into the weeks, e.g. p*d_large() returns false (sees the old
+ * value) and then p*d_offset() walks into the target huge page instead
+ * of the old page table (sees the new value).
+ */
+ pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
+ if (pgd_none(pgd))
+ goto out;
+
+ p4d = READ_ONCE(*p4d_offset(&pgd, hva));
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ goto out;
+
+ pud = READ_ONCE(*pud_offset(&p4d, hva));
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+ pmd = READ_ONCE(*pmd_offset(&pud, hva));
+ if (pmd_none(pmd) || !pmd_present(pmd))
+ goto out;
+
+ if (kvm_pte_huge(pmd_val(pmd)))
+ level = 1;
+
+out:
+ local_irq_restore(flags);
+ return level;
+}
+
+/*
+ * Split huge page
+ */
+static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t gfn)
+{
+ int i;
+ kvm_pte_t val, *child;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_memory_cache *memcache;
+
+ memcache = &vcpu->arch.mmu_page_cache;
+ child = kvm_mmu_memory_cache_alloc(memcache);
+ val = kvm_pte_mksmall(*ptep);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ kvm_set_pte(child + i, val);
+ val += PAGE_SIZE;
+ }
+
+ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */
+ kvm_set_pte(ptep, __pa(child));
+
+ kvm->stat.hugepages--;
+ kvm->stat.pages += PTRS_PER_PTE;
+
+ return child + (gfn & (PTRS_PER_PTE - 1));
+}
+
+/*
+ * kvm_map_page() - Map a guest physical page.
+ * @vcpu: vCPU pointer.
+ * @gpa: Guest physical address of fault.
+ * @write: Whether the fault was due to a write.
+ *
+ * Handle GPA faults by creating a new GPA mapping (or updating an existing
+ * one).
+ *
+ * This takes care of marking pages young or dirty (idle/dirty page tracking),
+ * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
+ * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
+ * caller.
+ *
+ * Returns: 0 on success
+ * -EFAULT if there is no memory region at @gpa or a write was
+ * attempted to a read-only memory region. This is usually handled
+ * as an MMIO access.
+ */
+static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
+{
+ bool writeable;
+ int srcu_idx, err, retry_no = 0, level;
+ unsigned long hva, mmu_seq, prot_bits;
+ kvm_pfn_t pfn;
+ kvm_pte_t *ptep, new_pte;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+
+ /* Try the fast path to handle old / clean pages */
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ err = kvm_map_page_fast(vcpu, gpa, write);
+ if (!err)
+ goto out;
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
+ if (kvm_is_error_hva(hva) || (write && !writeable)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* We need a minimum of cached pages ready for page table creation */
+ err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
+ if (err)
+ goto out;
+
+retry:
+ /*
+ * Used to check for invalidations in progress, of the pfn that is
+ * returned by pfn_to_pfn_prot below.
+ */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ /*
+ * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads in
+ * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
+ * risk the page we get a reference to getting unmapped before we have a
+ * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
+ *
+ * This smp_rmb() pairs with the effective smp_wmb() of the combination
+ * of the pte_unmap_unlock() after the PTE is zapped, and the
+ * spin_lock() in kvm_mmu_invalidate_invalidate_<page|range_end>() before
+ * mmu_invalidate_seq is incremented.
+ */
+ smp_rmb();
+
+ /* Slow path - ask KVM core whether we can access this GPA */
+ pfn = gfn_to_pfn_prot(kvm, gfn, write, &writeable);
+ if (is_error_noslot_pfn(pfn)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* Check if an invalidation has taken place since we got pfn */
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) {
+ /*
+ * This can happen when mappings are changed asynchronously, but
+ * also synchronously if a COW is triggered by
+ * gfn_to_pfn_prot().
+ */
+ spin_unlock(&kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ if (retry_no > 100) {
+ retry_no = 0;
+ schedule();
+ }
+ retry_no++;
+ goto retry;
+ }
+
+ /*
+ * For emulated devices such virtio device, actual cache attribute is
+ * determined by physical machine.
+ * For pass through physical device, it should be uncachable
+ */
+ prot_bits = _PAGE_PRESENT | __READABLE;
+ if (pfn_valid(pfn))
+ prot_bits |= _CACHE_CC;
+ else
+ prot_bits |= _CACHE_SUC;
+
+ if (writeable) {
+ prot_bits |= _PAGE_WRITE;
+ if (write)
+ prot_bits |= __WRITEABLE;
+ }
+
+ /* Disable dirty logging on HugePages */
+ level = 0;
+ if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) {
+ level = 0;
+ } else {
+ level = host_pfn_mapping_level(kvm, gfn, memslot);
+ if (level == 1) {
+ gfn = gfn & ~(PTRS_PER_PTE - 1);
+ pfn = pfn & ~(PTRS_PER_PTE - 1);
+ }
+ }
+
+ /* Ensure page tables are allocated */
+ ptep = kvm_populate_gpa(kvm, memcache, gpa, level);
+ new_pte = kvm_pfn_pte(pfn, __pgprot(prot_bits));
+ if (level == 1) {
+ new_pte = kvm_pte_mkhuge(new_pte);
+ /*
+ * previous pmd entry is invalid_pte_table
+ * there is invalid tlb with small page
+ * need flush these invalid tlbs for current vcpu
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ ++kvm->stat.hugepages;
+ } else if (kvm_pte_huge(*ptep) && write)
+ ptep = kvm_split_huge(vcpu, ptep, gfn);
+ else
+ ++kvm->stat.pages;
+ kvm_set_pte(ptep, new_pte);
+ spin_unlock(&kvm->mmu_lock);
+
+ if (prot_bits & _PAGE_DIRTY) {
+ mark_page_dirty_in_slot(kvm, memslot, gfn);
+ kvm_set_pfn_dirty(pfn);
+ }
+
+ kvm_set_pfn_accessed(pfn);
+ kvm_release_pfn_clean(pfn);
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return err;
+}
+
+int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
+{
+ int ret;
+
+ ret = kvm_map_page(vcpu, gpa, write);
+ if (ret)
+ return ret;
+
+ /* Invalidate this entry in the TLB */
+ kvm_flush_tlb_gpa(vcpu, gpa);
+
+ return 0;
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, enum kvm_mr_change change)
+{
+ return 0;
+}
+
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ kvm_flush_remote_tlbs(kvm);
+}
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
new file mode 100644
index 0000000000..0ed9040307
--- /dev/null
+++ b/arch/loongarch/kvm/switch.S
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/loongarch.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+
+#define HGPR_OFFSET(x) (PT_R0 + 8*x)
+#define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x)
+
+.macro kvm_save_host_gpr base
+ .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, HGPR_OFFSET(\n)
+ .endr
+.endm
+
+.macro kvm_restore_host_gpr base
+ .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, HGPR_OFFSET(\n)
+ .endr
+.endm
+
+/*
+ * Save and restore all GPRs except base register,
+ * and default value of base register is a2.
+ */
+.macro kvm_save_guest_gprs base
+ .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, GGPR_OFFSET(\n)
+ .endr
+.endm
+
+.macro kvm_restore_guest_gprs base
+ .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, GGPR_OFFSET(\n)
+ .endr
+.endm
+
+/*
+ * Prepare switch to guest, save host regs and restore guest regs.
+ * a2: kvm_vcpu_arch, don't touch it until 'ertn'
+ * t0, t1: temp register
+ */
+.macro kvm_switch_to_guest
+ /* Set host ECFG.VS=0, all exceptions share one exception entry */
+ csrrd t0, LOONGARCH_CSR_ECFG
+ bstrins.w t0, zero, CSR_ECFG_VS_SHIFT_END, CSR_ECFG_VS_SHIFT
+ csrwr t0, LOONGARCH_CSR_ECFG
+
+ /* Load up the new EENTRY */
+ ld.d t0, a2, KVM_ARCH_GEENTRY
+ csrwr t0, LOONGARCH_CSR_EENTRY
+
+ /* Set Guest ERA */
+ ld.d t0, a2, KVM_ARCH_GPC
+ csrwr t0, LOONGARCH_CSR_ERA
+
+ /* Save host PGDL */
+ csrrd t0, LOONGARCH_CSR_PGDL
+ st.d t0, a2, KVM_ARCH_HPGD
+
+ /* Switch to kvm */
+ ld.d t1, a2, KVM_VCPU_KVM - KVM_VCPU_ARCH
+
+ /* Load guest PGDL */
+ li.w t0, KVM_GPGD
+ ldx.d t0, t1, t0
+ csrwr t0, LOONGARCH_CSR_PGDL
+
+ /* Mix GID and RID */
+ csrrd t1, LOONGARCH_CSR_GSTAT
+ bstrpick.w t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
+ csrrd t0, LOONGARCH_CSR_GTLBC
+ bstrins.w t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+ csrwr t0, LOONGARCH_CSR_GTLBC
+
+ /*
+ * Enable intr in root mode with future ertn so that host interrupt
+ * can be responsed during VM runs
+ * Guest CRMD comes from separate GCSR_CRMD register
+ */
+ ori t0, zero, CSR_PRMD_PIE
+ csrxchg t0, t0, LOONGARCH_CSR_PRMD
+
+ /* Set PVM bit to setup ertn to guest context */
+ ori t0, zero, CSR_GSTAT_PVM
+ csrxchg t0, t0, LOONGARCH_CSR_GSTAT
+
+ /* Load Guest GPRs */
+ kvm_restore_guest_gprs a2
+ /* Load KVM_ARCH register */
+ ld.d a2, a2, (KVM_ARCH_GGPR + 8 * REG_A2)
+
+ ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0 */
+.endm
+
+ /*
+ * Exception entry for general exception from guest mode
+ * - IRQ is disabled
+ * - kernel privilege in root mode
+ * - page mode keep unchanged from previous PRMD in root mode
+ * - Fixme: tlb exception cannot happen since registers relative with TLB
+ * - is still in guest mode, such as pgd table/vmid registers etc,
+ * - will fix with hw page walk enabled in future
+ * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS
+ */
+ .text
+ .cfi_sections .debug_frame
+SYM_CODE_START(kvm_exc_entry)
+ csrwr a2, KVM_TEMP_KS
+ csrrd a2, KVM_VCPU_KS
+ addi.d a2, a2, KVM_VCPU_ARCH
+
+ /* After save GPRs, free to use any GPR */
+ kvm_save_guest_gprs a2
+ /* Save guest A2 */
+ csrrd t0, KVM_TEMP_KS
+ st.d t0, a2, (KVM_ARCH_GGPR + 8 * REG_A2)
+
+ /* A2 is kvm_vcpu_arch, A1 is free to use */
+ csrrd s1, KVM_VCPU_KS
+ ld.d s0, s1, KVM_VCPU_RUN
+
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, a2, KVM_ARCH_HESTAT
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, a2, KVM_ARCH_GPC
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, a2, KVM_ARCH_HBADV
+ csrrd t0, LOONGARCH_CSR_BADI
+ st.d t0, a2, KVM_ARCH_HBADI
+
+ /* Restore host ECFG.VS */
+ csrrd t0, LOONGARCH_CSR_ECFG
+ ld.d t1, a2, KVM_ARCH_HECFG
+ or t0, t0, t1
+ csrwr t0, LOONGARCH_CSR_ECFG
+
+ /* Restore host EENTRY */
+ ld.d t0, a2, KVM_ARCH_HEENTRY
+ csrwr t0, LOONGARCH_CSR_EENTRY
+
+ /* Restore host pgd table */
+ ld.d t0, a2, KVM_ARCH_HPGD
+ csrwr t0, LOONGARCH_CSR_PGDL
+
+ /*
+ * Disable PGM bit to enter root mode by default with next ertn
+ */
+ ori t0, zero, CSR_GSTAT_PVM
+ csrxchg zero, t0, LOONGARCH_CSR_GSTAT
+
+ /*
+ * Clear GTLBC.TGID field
+ * 0: for root tlb update in future tlb instr
+ * others: for guest tlb update like gpa to hpa in future tlb instr
+ */
+ csrrd t0, LOONGARCH_CSR_GTLBC
+ bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+ csrwr t0, LOONGARCH_CSR_GTLBC
+ ld.d tp, a2, KVM_ARCH_HTP
+ ld.d sp, a2, KVM_ARCH_HSP
+ /* restore per cpu register */
+ ld.d u0, a2, KVM_ARCH_HPERCPU
+ addi.d sp, sp, -PT_SIZE
+
+ /* Prepare handle exception */
+ or a0, s0, zero
+ or a1, s1, zero
+ ld.d t8, a2, KVM_ARCH_HANDLE_EXIT
+ jirl ra, t8, 0
+
+ or a2, s1, zero
+ addi.d a2, a2, KVM_VCPU_ARCH
+
+ /* Resume host when ret <= 0 */
+ blez a0, ret_to_host
+
+ /*
+ * Return to guest
+ * Save per cpu register again, maybe switched to another cpu
+ */
+ st.d u0, a2, KVM_ARCH_HPERCPU
+
+ /* Save kvm_vcpu to kscratch */
+ csrwr s1, KVM_VCPU_KS
+ kvm_switch_to_guest
+
+ret_to_host:
+ ld.d a2, a2, KVM_ARCH_HSP
+ addi.d a2, a2, -PT_SIZE
+ kvm_restore_host_gpr a2
+ jr ra
+
+SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
+SYM_CODE_END(kvm_exc_entry)
+
+/*
+ * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ *
+ * @register_param:
+ * a0: kvm_run* run
+ * a1: kvm_vcpu* vcpu
+ */
+SYM_FUNC_START(kvm_enter_guest)
+ /* Allocate space in stack bottom */
+ addi.d a2, sp, -PT_SIZE
+ /* Save host GPRs */
+ kvm_save_host_gpr a2
+
+ /* Save host CRMD, PRMD to stack */
+ csrrd a3, LOONGARCH_CSR_CRMD
+ st.d a3, a2, PT_CRMD
+ csrrd a3, LOONGARCH_CSR_PRMD
+ st.d a3, a2, PT_PRMD
+
+ addi.d a2, a1, KVM_VCPU_ARCH
+ st.d sp, a2, KVM_ARCH_HSP
+ st.d tp, a2, KVM_ARCH_HTP
+ /* Save per cpu register */
+ st.d u0, a2, KVM_ARCH_HPERCPU
+
+ /* Save kvm_vcpu to kscratch */
+ csrwr a1, KVM_VCPU_KS
+ kvm_switch_to_guest
+SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL)
+SYM_FUNC_END(kvm_enter_guest)
+
+SYM_FUNC_START(kvm_save_fpu)
+ fpu_save_csr a0 t1
+ fpu_save_double a0 t1
+ fpu_save_cc a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_save_fpu)
+
+SYM_FUNC_START(kvm_restore_fpu)
+ fpu_restore_double a0 t1
+ fpu_restore_csr a0 t1 t2
+ fpu_restore_cc a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_fpu)
+
+ .section ".rodata"
+SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
+SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
new file mode 100644
index 0000000000..284bf553fe
--- /dev/null
+++ b/arch/loongarch/kvm/timer.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+
+/*
+ * ktime_to_tick() - Scale ktime_t to timer tick value.
+ */
+static inline u64 ktime_to_tick(struct kvm_vcpu *vcpu, ktime_t now)
+{
+ u64 delta;
+
+ delta = ktime_to_ns(now);
+ return div_u64(delta * vcpu->arch.timer_mhz, MNSEC_PER_SEC);
+}
+
+static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick)
+{
+ return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz);
+}
+
+/*
+ * Push timer forward on timeout.
+ * Handle an hrtimer event by push the hrtimer forward a period.
+ */
+static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu)
+{
+ unsigned long cfg, period;
+
+ /* Add periodic tick to current expire time */
+ cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG);
+ if (cfg & CSR_TCFG_PERIOD) {
+ period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL);
+ hrtimer_add_expires_ns(&vcpu->arch.swtimer, period);
+ return HRTIMER_RESTART;
+ } else
+ return HRTIMER_NORESTART;
+}
+
+/* Low level hrtimer wake routine */
+enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.swtimer);
+ kvm_queue_irq(vcpu, INT_TI);
+ rcuwait_wake_up(&vcpu->wait);
+
+ return kvm_count_timeout(vcpu);
+}
+
+/*
+ * Initialise the timer to the specified frequency, zero it
+ */
+void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
+{
+ vcpu->arch.timer_mhz = timer_hz >> 20;
+
+ /* Starting at 0 */
+ kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TVAL, 0);
+}
+
+/*
+ * Restore hard timer state and enable guest to access timer registers
+ * without trap, should be called with irq disabled
+ */
+void kvm_acquire_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long cfg;
+
+ cfg = read_csr_gcfg();
+ if (!(cfg & CSR_GCFG_TIT))
+ return;
+
+ /* Enable guest access to hard timer */
+ write_csr_gcfg(cfg & ~CSR_GCFG_TIT);
+
+ /*
+ * Freeze the soft-timer and sync the guest stable timer with it. We do
+ * this with interrupts disabled to avoid latency.
+ */
+ hrtimer_cancel(&vcpu->arch.swtimer);
+}
+
+/*
+ * Restore soft timer state from saved context.
+ */
+void kvm_restore_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long cfg, delta, period;
+ ktime_t expire, now;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * Set guest stable timer cfg csr
+ */
+ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ if (!(cfg & CSR_TCFG_EN)) {
+ /* Guest timer is disabled, just restore timer registers */
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ return;
+ }
+
+ /*
+ * Set remainder tick value if not expired
+ */
+ now = ktime_get();
+ expire = vcpu->arch.expire;
+ if (ktime_before(now, expire))
+ delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
+ else {
+ if (cfg & CSR_TCFG_PERIOD) {
+ period = cfg & CSR_TCFG_VAL;
+ delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
+ delta = period - (delta % period);
+ } else
+ delta = 0;
+ /*
+ * Inject timer here though sw timer should inject timer
+ * interrupt async already, since sw timer may be cancelled
+ * during injecting intr async in function kvm_acquire_timer
+ */
+ kvm_queue_irq(vcpu, INT_TI);
+ }
+
+ write_gcsr_timertick(delta);
+}
+
+/*
+ * Save guest timer state and switch to software emulation of guest
+ * timer. The hard timer must already be in use, so preemption should be
+ * disabled.
+ */
+static void _kvm_save_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long ticks, delta;
+ ktime_t expire;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ delta = tick_to_ns(vcpu, ticks);
+ expire = ktime_add_ns(ktime_get(), delta);
+ vcpu->arch.expire = expire;
+ if (ticks) {
+ /*
+ * Update hrtimer to use new timeout
+ * HRTIMER_MODE_PINNED is suggested since vcpu may run in
+ * the same physical cpu in next time
+ */
+ hrtimer_cancel(&vcpu->arch.swtimer);
+ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
+ } else
+ /*
+ * Inject timer interrupt so that hall polling can dectect and exit
+ */
+ kvm_queue_irq(vcpu, INT_TI);
+}
+
+/*
+ * Save guest timer state and switch to soft guest timer if hard timer was in
+ * use.
+ */
+void kvm_save_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long cfg;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ preempt_disable();
+ cfg = read_csr_gcfg();
+ if (!(cfg & CSR_GCFG_TIT)) {
+ /* Disable guest use of hard timer */
+ write_csr_gcfg(cfg | CSR_GCFG_TIT);
+
+ /* Save hard timer state */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
+ _kvm_save_timer(vcpu);
+ }
+
+ /* Save timer-related state to vCPU context */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ preempt_enable();
+}
+
+void kvm_reset_timer(struct kvm_vcpu *vcpu)
+{
+ write_gcsr_timercfg(0);
+ kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0);
+ hrtimer_cancel(&vcpu->arch.swtimer);
+}
diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c
new file mode 100644
index 0000000000..02535df6b5
--- /dev/null
+++ b/arch/loongarch/kvm/tlb.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/tlb.h>
+#include <asm/kvm_csr.h>
+
+/*
+ * kvm_flush_tlb_all() - Flush all root TLB entries for guests.
+ *
+ * Invalidate all entries including GVA-->GPA and GPA-->HPA mappings.
+ */
+void kvm_flush_tlb_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ invtlb_all(INVTLB_ALLGID, 0, 0);
+ local_irq_restore(flags);
+}
+
+void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ gpa &= (PAGE_MASK << 1);
+ invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa);
+ local_irq_restore(flags);
+}
diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h
new file mode 100644
index 0000000000..a1e35d6554
--- /dev/null
+++ b/arch/loongarch/kvm/trace.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/kvm_csr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for VM enters
+ */
+DECLARE_EVENT_CLASS(kvm_transition,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+ TP_STRUCT__entry(
+ __field(unsigned long, pc)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = vcpu->arch.pc;
+ ),
+
+ TP_printk("PC: 0x%08lx", __entry->pc)
+);
+
+DEFINE_EVENT(kvm_transition, kvm_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_reenter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_out,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+/* Further exit reasons */
+#define KVM_TRACE_EXIT_IDLE 64
+#define KVM_TRACE_EXIT_CACHE 65
+
+/* Tracepoints for VM exits */
+#define kvm_trace_symbol_exit_types \
+ { KVM_TRACE_EXIT_IDLE, "IDLE" }, \
+ { KVM_TRACE_EXIT_CACHE, "CACHE" }
+
+DECLARE_EVENT_CLASS(kvm_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason),
+ TP_STRUCT__entry(
+ __field(unsigned long, pc)
+ __field(unsigned int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = vcpu->arch.pc;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("[%s]PC: 0x%08lx",
+ __print_symbolic(__entry->reason,
+ kvm_trace_symbol_exit_types),
+ __entry->pc)
+);
+
+DEFINE_EVENT(kvm_exit, kvm_exit_idle,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+DEFINE_EVENT(kvm_exit, kvm_exit_cache,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+DEFINE_EVENT(kvm_exit, kvm_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+TRACE_EVENT(kvm_exit_gspr,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word),
+ TP_ARGS(vcpu, inst_word),
+ TP_STRUCT__entry(
+ __field(unsigned int, inst_word)
+ ),
+
+ TP_fast_assign(
+ __entry->inst_word = inst_word;
+ ),
+
+ TP_printk("Inst word: 0x%08x", __entry->inst_word)
+);
+
+#define KVM_TRACE_AUX_SAVE 0
+#define KVM_TRACE_AUX_RESTORE 1
+#define KVM_TRACE_AUX_ENABLE 2
+#define KVM_TRACE_AUX_DISABLE 3
+#define KVM_TRACE_AUX_DISCARD 4
+
+#define KVM_TRACE_AUX_FPU 1
+
+#define kvm_trace_symbol_aux_op \
+ { KVM_TRACE_AUX_SAVE, "save" }, \
+ { KVM_TRACE_AUX_RESTORE, "restore" }, \
+ { KVM_TRACE_AUX_ENABLE, "enable" }, \
+ { KVM_TRACE_AUX_DISABLE, "disable" }, \
+ { KVM_TRACE_AUX_DISCARD, "discard" }
+
+#define kvm_trace_symbol_aux_state \
+ { KVM_TRACE_AUX_FPU, "FPU" }
+
+TRACE_EVENT(kvm_aux,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
+ unsigned int state),
+ TP_ARGS(vcpu, op, state),
+ TP_STRUCT__entry(
+ __field(unsigned long, pc)
+ __field(u8, op)
+ __field(u8, state)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = vcpu->arch.pc;
+ __entry->op = op;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s %s PC: 0x%08lx",
+ __print_symbolic(__entry->op,
+ kvm_trace_symbol_aux_op),
+ __print_symbolic(__entry->state,
+ kvm_trace_symbol_aux_state),
+ __entry->pc)
+);
+
+TRACE_EVENT(kvm_vpid_change,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long vpid),
+ TP_ARGS(vcpu, vpid),
+ TP_STRUCT__entry(
+ __field(unsigned long, vpid)
+ ),
+
+ TP_fast_assign(
+ __entry->vpid = vpid;
+ ),
+
+ TP_printk("VPID: 0x%08lx", __entry->vpid)
+);
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/loongarch/kvm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
new file mode 100644
index 0000000000..73d0c2b9c1
--- /dev/null
+++ b/arch/loongarch/kvm/vcpu.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/entry-kvm.h>
+#include <asm/fpu.h>
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+#include <asm/time.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, int_exits),
+ STATS_DESC_COUNTER(VCPU, idle_exits),
+ STATS_DESC_COUNTER(VCPU, cpucfg_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
+};
+
+/*
+ * kvm_check_requests - check and handle pending vCPU requests
+ *
+ * Return: RESUME_GUEST if we should enter the guest
+ * RESUME_HOST if we should exit to userspace
+ */
+static int kvm_check_requests(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_request_pending(vcpu))
+ return RESUME_GUEST;
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ vcpu->arch.vpid = 0; /* Drop vpid for this vCPU */
+
+ if (kvm_dirty_ring_check_request(vcpu))
+ return RESUME_HOST;
+
+ return RESUME_GUEST;
+}
+
+/*
+ * Check and handle pending signal and vCPU requests etc
+ * Run with irq enabled and preempt enabled
+ *
+ * Return: RESUME_GUEST if we should enter the guest
+ * RESUME_HOST if we should exit to userspace
+ * < 0 if we should exit to userspace, where the return value
+ * indicates an error
+ */
+static int kvm_enter_guest_check(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ /*
+ * Check conditions before entering the guest
+ */
+ ret = xfer_to_guest_mode_handle_work(vcpu);
+ if (ret < 0)
+ return ret;
+
+ ret = kvm_check_requests(vcpu);
+
+ return ret;
+}
+
+/*
+ * Called with irq enabled
+ *
+ * Return: RESUME_GUEST if we should enter the guest, and irq disabled
+ * Others if we should exit to userspace
+ */
+static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ do {
+ ret = kvm_enter_guest_check(vcpu);
+ if (ret != RESUME_GUEST)
+ break;
+
+ /*
+ * Handle vcpu timer, interrupts, check requests and
+ * check vmid before vcpu enter guest
+ */
+ local_irq_disable();
+ kvm_acquire_timer(vcpu);
+ kvm_deliver_intr(vcpu);
+ kvm_deliver_exception(vcpu);
+ /* Make sure the vcpu mode has been written */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_check_vpid(vcpu);
+ vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
+ /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST;
+
+ if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) {
+ /* make sure the vcpu mode has been written */
+ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE);
+ local_irq_enable();
+ ret = -EAGAIN;
+ }
+ } while (ret != RESUME_GUEST);
+
+ return ret;
+}
+
+/*
+ * Return 1 for resume guest and "<= 0" for resume host.
+ */
+static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int ret = RESUME_GUEST;
+ unsigned long estat = vcpu->arch.host_estat;
+ u32 intr = estat & 0x1fff; /* Ignore NMI */
+ u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+
+ /* Set a default exit reason */
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+
+ guest_timing_exit_irqoff();
+ guest_state_exit_irqoff();
+ local_irq_enable();
+
+ trace_kvm_exit(vcpu, ecode);
+ if (ecode) {
+ ret = kvm_handle_fault(vcpu, ecode);
+ } else {
+ WARN(!intr, "vm exiting with suspicious irq\n");
+ ++vcpu->stat.int_exits;
+ }
+
+ if (ret == RESUME_GUEST)
+ ret = kvm_pre_enter_guest(vcpu);
+
+ if (ret != RESUME_GUEST) {
+ local_irq_disable();
+ return ret;
+ }
+
+ guest_timing_enter_irqoff();
+ guest_state_enter_irqoff();
+ trace_kvm_reenter(vcpu);
+
+ return RESUME_GUEST;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.irq_pending) &&
+ vcpu->arch.mp_state.mp_state == KVM_MP_STATE_RUNNABLE;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return kvm_pending_timer(vcpu) ||
+ kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI);
+}
+
+int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ kvm_debug("vCPU Register Dump:\n");
+ kvm_debug("\tPC = 0x%08lx\n", vcpu->arch.pc);
+ kvm_debug("\tExceptions: %08lx\n", vcpu->arch.irq_pending);
+
+ for (i = 0; i < 32; i += 4) {
+ kvm_debug("\tGPR%02d: %08lx %08lx %08lx %08lx\n", i,
+ vcpu->arch.gprs[i], vcpu->arch.gprs[i + 1],
+ vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]);
+ }
+
+ kvm_debug("\tCRMD: 0x%08lx, ESTAT: 0x%08lx\n",
+ kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD),
+ kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT));
+
+ kvm_debug("\tERA: 0x%08lx\n", kvm_read_hw_gcsr(LOONGARCH_CSR_ERA));
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ *mp_state = vcpu->arch.mp_state;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int ret = 0;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.mp_state = *mp_state;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
+/**
+ * kvm_migrate_count() - Migrate timer.
+ * @vcpu: Virtual CPU.
+ *
+ * Migrate hrtimer to the current CPU by cancelling and restarting it
+ * if the hrtimer is active.
+ *
+ * Must be called when the vCPU is migrated to a different CPU, so that
+ * the timer can interrupt the guest at the new CPU, and the timer irq can
+ * be delivered to the vCPU.
+ */
+static void kvm_migrate_count(struct kvm_vcpu *vcpu)
+{
+ if (hrtimer_cancel(&vcpu->arch.swtimer))
+ hrtimer_restart(&vcpu->arch.swtimer);
+}
+
+static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
+{
+ unsigned long gintc;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(id) & INVALID_GCSR)
+ return -EINVAL;
+
+ if (id == LOONGARCH_CSR_ESTAT) {
+ /* ESTAT IP0~IP7 get from GINTC */
+ gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff;
+ *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2);
+ return 0;
+ }
+
+ /*
+ * Get software CSR state since software state is consistent
+ * with hardware for synchronous ioctl
+ */
+ *val = kvm_read_sw_gcsr(csr, id);
+
+ return 0;
+}
+
+static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
+{
+ int ret = 0, gintc;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(id) & INVALID_GCSR)
+ return -EINVAL;
+
+ if (id == LOONGARCH_CSR_ESTAT) {
+ /* ESTAT IP0~IP7 inject through GINTC */
+ gintc = (val >> 2) & 0xff;
+ kvm_set_sw_gcsr(csr, LOONGARCH_CSR_GINTC, gintc);
+
+ gintc = val & ~(0xffUL << 2);
+ kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
+
+ return ret;
+ }
+
+ kvm_write_sw_gcsr(csr, id, val);
+
+ return ret;
+}
+
+static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, u64 *v)
+{
+ int id, ret = 0;
+ u64 type = reg->id & KVM_REG_LOONGARCH_MASK;
+
+ switch (type) {
+ case KVM_REG_LOONGARCH_CSR:
+ id = KVM_GET_IOC_CSR_IDX(reg->id);
+ ret = _kvm_getcsr(vcpu, id, v);
+ break;
+ case KVM_REG_LOONGARCH_CPUCFG:
+ id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
+ if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
+ *v = vcpu->arch.cpucfg[id];
+ else
+ ret = -EINVAL;
+ break;
+ case KVM_REG_LOONGARCH_KVM:
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_COUNTER:
+ *v = drdtime() + vcpu->kvm->arch.time_offset;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret = 0;
+ u64 v, size = reg->id & KVM_REG_SIZE_MASK;
+
+ switch (size) {
+ case KVM_REG_SIZE_U64:
+ ret = kvm_get_one_reg(vcpu, reg, &v);
+ if (ret)
+ return ret;
+ ret = put_user(v, (u64 __user *)(long)reg->addr);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, u64 v)
+{
+ int id, ret = 0;
+ u64 type = reg->id & KVM_REG_LOONGARCH_MASK;
+
+ switch (type) {
+ case KVM_REG_LOONGARCH_CSR:
+ id = KVM_GET_IOC_CSR_IDX(reg->id);
+ ret = _kvm_setcsr(vcpu, id, v);
+ break;
+ case KVM_REG_LOONGARCH_CPUCFG:
+ id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
+ if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
+ vcpu->arch.cpucfg[id] = (u32)v;
+ else
+ ret = -EINVAL;
+ break;
+ case KVM_REG_LOONGARCH_KVM:
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_COUNTER:
+ /*
+ * gftoffset is relative with board, not vcpu
+ * only set for the first time for smp system
+ */
+ if (vcpu->vcpu_id == 0)
+ vcpu->kvm->arch.time_offset = (signed long)(v - drdtime());
+ break;
+ case KVM_REG_LOONGARCH_VCPU_RESET:
+ kvm_reset_timer(vcpu);
+ memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending));
+ memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear));
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret = 0;
+ u64 v, size = reg->id & KVM_REG_SIZE_MASK;
+
+ switch (size) {
+ case KVM_REG_SIZE_U64:
+ ret = get_user(v, (u64 __user *)(long)reg->addr);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return kvm_set_one_reg(vcpu, reg, v);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
+ regs->gpr[i] = vcpu->arch.gprs[i];
+
+ regs->pc = vcpu->arch.pc;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
+ vcpu->arch.gprs[i] = regs->gpr[i];
+
+ vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
+ vcpu->arch.pc = regs->pc;
+
+ return 0;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ /* FPU is enabled by default, will support LSX/LASX later. */
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ long r;
+ void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = filp->private_data;
+
+ /*
+ * Only software CSR should be modified
+ *
+ * If any hardware CSR register is modified, vcpu_load/vcpu_put pair
+ * should be used. Since CSR registers owns by this vcpu, if switch
+ * to other vcpus, other vcpus need reload CSR registers.
+ *
+ * If software CSR is modified, bit KVM_LARCH_HWCSR_USABLE should
+ * be clear in vcpu->arch.aux_inuse, and vcpu_load will check
+ * aux_inuse flag and reload CSR registers form software.
+ */
+
+ switch (ioctl) {
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+ if (ioctl == KVM_SET_ONE_REG) {
+ r = kvm_set_reg(vcpu, &reg);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_HWCSR_USABLE;
+ } else
+ r = kvm_get_reg(vcpu, &reg);
+ break;
+ }
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
+ default:
+ r = -ENOIOCTLCMD;
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ int i = 0;
+
+ fpu->fcc = vcpu->arch.fpu.fcc;
+ fpu->fcsr = vcpu->arch.fpu.fcsr;
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&fpu->fpr[i], &vcpu->arch.fpu.fpr[i], FPU_REG_WIDTH / 64);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ int i = 0;
+
+ vcpu->arch.fpu.fcc = fpu->fcc;
+ vcpu->arch.fpu.fcsr = fpu->fcsr;
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&vcpu->arch.fpu.fpr[i], &fpu->fpr[i], FPU_REG_WIDTH / 64);
+
+ return 0;
+}
+
+/* Enable FPU and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+
+ /* Enable FPU */
+ set_csr_euen(CSR_EUEN_FPEN);
+
+ kvm_restore_fpu(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
+
+ preempt_enable();
+}
+
+/* Save context and disable FPU */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+
+ if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ kvm_save_fpu(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU;
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
+
+ /* Disable FPU */
+ clear_csr_euen(CSR_EUEN_FPEN);
+ }
+
+ preempt_enable();
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ if (intr > 0)
+ kvm_queue_irq(vcpu, intr);
+ else if (intr < 0)
+ kvm_dequeue_irq(vcpu, -intr);
+ else {
+ kvm_err("%s: invalid interrupt ioctl %d\n", __func__, irq->irq);
+ return -EINVAL;
+ }
+
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = filp->private_data;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+
+ kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, irq.irq);
+
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ unsigned long timer_hz;
+ struct loongarch_csrs *csr;
+
+ vcpu->arch.vpid = 0;
+
+ hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ vcpu->arch.swtimer.function = kvm_swtimer_wakeup;
+
+ vcpu->arch.handle_exit = kvm_handle_exit;
+ vcpu->arch.guest_eentry = (unsigned long)kvm_loongarch_ops->exc_entry;
+ vcpu->arch.csr = kzalloc(sizeof(struct loongarch_csrs), GFP_KERNEL);
+ if (!vcpu->arch.csr)
+ return -ENOMEM;
+
+ /*
+ * All kvm exceptions share one exception entry, and host <-> guest
+ * switch also switch ECFG.VS field, keep host ECFG.VS info here.
+ */
+ vcpu->arch.host_ecfg = (read_csr_ecfg() & CSR_ECFG_VS);
+
+ /* Init */
+ vcpu->arch.last_sched_cpu = -1;
+
+ /*
+ * Initialize guest register state to valid architectural reset state.
+ */
+ timer_hz = calc_const_freq();
+ kvm_init_timer(vcpu, timer_hz);
+
+ /* Set Initialize mode for guest */
+ csr = vcpu->arch.csr;
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CRMD, CSR_CRMD_DA);
+
+ /* Set cpuid */
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
+
+ /* Start with no pending virtual guest interrupts */
+ csr->csrs[LOONGARCH_CSR_GINTC] = 0;
+
+ return 0;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct kvm_context *context;
+
+ hrtimer_cancel(&vcpu->arch.swtimer);
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ kfree(vcpu->arch.csr);
+
+ /*
+ * If the vCPU is freed and reused as another vCPU, we don't want the
+ * matching pointer wrongly hanging around in last_vcpu.
+ */
+ for_each_possible_cpu(cpu) {
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ if (context->last_vcpu == vcpu)
+ context->last_vcpu = NULL;
+ }
+}
+
+static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ bool migrated;
+ struct kvm_context *context;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * Have we migrated to a different CPU?
+ * If so, any old guest TLB state may be stale.
+ */
+ migrated = (vcpu->arch.last_sched_cpu != cpu);
+
+ /*
+ * Was this the last vCPU to run on this CPU?
+ * If not, any old guest state from this vCPU will have been clobbered.
+ */
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ if (migrated || (context->last_vcpu != vcpu))
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_HWCSR_USABLE;
+ context->last_vcpu = vcpu;
+
+ /* Restore timer state regardless */
+ kvm_restore_timer(vcpu);
+
+ /* Control guest page CCA attribute */
+ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
+
+ /* Don't bother restoring registers multiple times unless necessary */
+ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
+ return 0;
+
+ write_csr_gcntc((ulong)vcpu->kvm->arch.time_offset);
+
+ /* Restore guest CSR registers */
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_EUEN);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_MISC);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ECFG);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ERA);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_BADV);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_BADI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_EENTRY);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBIDX);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBEHI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBELO0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBELO1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ASID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PGDL);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PGDH);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PWCTL0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PWCTL1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_STLBPGSIZE);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_RVACFG);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CPUID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS4);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS5);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS6);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS7);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TMID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CNTC);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRENTRY);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRBADV);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRERA);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRSAVE);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBREHI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRPRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_LLBCTL);
+
+ /* Restore Root.GINTC from unused Guest.GINTC register */
+ write_csr_gintc(csr->csrs[LOONGARCH_CSR_GINTC]);
+
+ /*
+ * We should clear linked load bit to break interrupted atomics. This
+ * prevents a SC on the next vCPU from succeeding by matching a LL on
+ * the previous vCPU.
+ */
+ if (vcpu->kvm->created_vcpus > 1)
+ set_gcsr_llbctl(CSR_LLBCTL_WCLLB);
+
+ vcpu->arch.aux_inuse |= KVM_LARCH_HWCSR_USABLE;
+
+ return 0;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (vcpu->arch.last_sched_cpu != cpu) {
+ kvm_debug("[%d->%d]KVM vCPU[%d] switch\n",
+ vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+ /*
+ * Migrate the timer interrupt to the current CPU so that it
+ * always interrupts the guest and synchronously triggers a
+ * guest timer interrupt.
+ */
+ kvm_migrate_count(vcpu);
+ }
+
+ /* Restore guest state to registers */
+ _kvm_vcpu_load(vcpu, cpu);
+ local_irq_restore(flags);
+}
+
+static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_lose_fpu(vcpu);
+
+ /*
+ * Update CSR state from hardware if software CSR state is stale,
+ * most CSR registers are kept unchanged during process context
+ * switch except CSR registers like remaining timer tick value and
+ * injected interrupt state.
+ */
+ if (vcpu->arch.aux_inuse & KVM_LARCH_SWCSR_LATEST)
+ goto out;
+
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_EUEN);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_MISC);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ECFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ERA);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_BADV);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_BADI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_EENTRY);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBIDX);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBEHI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBELO0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBELO1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ASID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PGDL);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PGDH);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PWCTL0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PWCTL1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_STLBPGSIZE);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_RVACFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CPUID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG3);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS3);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS4);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS5);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS6);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS7);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TMID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CNTC);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_LLBCTL);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRENTRY);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRBADV);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRERA);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRSAVE);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBREHI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRPRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN3);
+
+ vcpu->arch.aux_inuse |= KVM_LARCH_SWCSR_LATEST;
+
+out:
+ kvm_save_timer(vcpu);
+ /* Save Root.GINTC into unused Guest.GINTC register */
+ csr->csrs[LOONGARCH_CSR_GINTC] = read_csr_gintc();
+
+ return 0;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ vcpu->arch.last_sched_cpu = cpu;
+
+ /* Save guest state in registers */
+ _kvm_vcpu_put(vcpu, cpu);
+ local_irq_restore(flags);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ int r = -EINTR;
+ struct kvm_run *run = vcpu->run;
+
+ if (vcpu->mmio_needed) {
+ if (!vcpu->mmio_is_write)
+ kvm_complete_mmio_read(vcpu, run);
+ vcpu->mmio_needed = 0;
+ }
+
+ if (run->exit_reason == KVM_EXIT_LOONGARCH_IOCSR) {
+ if (!run->iocsr_io.is_write)
+ kvm_complete_iocsr_read(vcpu, run);
+ }
+
+ if (run->immediate_exit)
+ return r;
+
+ /* Clear exit_reason */
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ lose_fpu(1);
+ vcpu_load(vcpu);
+ kvm_sigset_activate(vcpu);
+ r = kvm_pre_enter_guest(vcpu);
+ if (r != RESUME_GUEST)
+ goto out;
+
+ guest_timing_enter_irqoff();
+ guest_state_enter_irqoff();
+ trace_kvm_enter(vcpu);
+ r = kvm_loongarch_ops->enter_guest(run, vcpu);
+
+ trace_kvm_out(vcpu);
+ /*
+ * Guest exit is already recorded at kvm_handle_exit()
+ * return value must not be RESUME_GUEST
+ */
+ local_irq_enable();
+out:
+ kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
+
+ return r;
+}
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
new file mode 100644
index 0000000000..0a37f6fa8f
--- /dev/null
+++ b/arch/loongarch/kvm/vm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, pages),
+ STATS_DESC_ICOUNTER(VM, hugepages),
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ int i;
+
+ /* Allocate page table to map GPA -> RPA */
+ kvm->arch.pgd = kvm_pgd_alloc();
+ if (!kvm->arch.pgd)
+ return -ENOMEM;
+
+ kvm_init_vmcs(kvm);
+ kvm->arch.gpa_size = BIT(cpu_vabits - 1);
+ kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
+ kvm->arch.invalid_ptes[0] = 0;
+ kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table;
+#if CONFIG_PGTABLE_LEVELS > 2
+ kvm->arch.invalid_ptes[2] = (unsigned long)invalid_pmd_table;
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
+ kvm->arch.invalid_ptes[3] = (unsigned long)invalid_pud_table;
+#endif
+ for (i = 0; i <= kvm->arch.root_level; i++)
+ kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
+
+ return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kvm_destroy_vcpus(kvm);
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_MP_STATE:
+ r = 1;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ r = num_online_cpus();
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_MAX_VCPU_ID:
+ r = KVM_MAX_VCPU_IDS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ return -ENOIOCTLCMD;
+}
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index cc3e81fe01..c608adc998 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *addr)
unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
unsigned long offset = 0;
+ if (maddr >= FIXADDR_START)
+ return (void *)(kasan_early_shadow_page);
+
maddr &= XRANGE_SHADOW_MASK;
switch (xrange) {
case XKPRANGE_CC_SEG:
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 2c0a411f23..0b95d32b30 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -284,12 +284,16 @@ static void setup_tlb_handler(int cpu)
set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
- }
+ } else {
+ int vec_sz __maybe_unused;
+ void *addr __maybe_unused;
+ struct page *page __maybe_unused;
+
+ /* Avoid lockdep warning */
+ rcutree_report_cpu_starting(cpu);
+
#ifdef CONFIG_NUMA
- else {
- void *addr;
- struct page *page;
- const int vec_sz = sizeof(exception_handlers);
+ vec_sz = sizeof(exception_handlers);
if (pcpu_handlers[cpu])
return;
@@ -305,8 +309,8 @@ static void setup_tlb_handler(int cpu)
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
- }
#endif
+ }
}
void tlb_init(int cpu)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 9eb7753d11..5c634de4ee 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -411,7 +411,11 @@ static int add_exception_handler(const struct bpf_insn *insn,
off_t offset;
struct exception_table_entry *ex;
- if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+ if (!ctx->image || !ctx->prog->aux->extable)
+ return 0;
+
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
return 0;
if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
@@ -450,7 +454,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
{
u8 tm = -1;
u64 func_addr;
- bool func_addr_fixed;
+ bool func_addr_fixed, sign_extend;
int i = insn - ctx->prog->insnsi;
int ret, jmp_offset;
const u8 code = insn->code;
@@ -467,8 +471,25 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
case BPF_ALU64 | BPF_MOV | BPF_X:
- move_reg(ctx, dst, src);
- emit_zext_32(ctx, dst, is32);
+ switch (off) {
+ case 0:
+ move_reg(ctx, dst, src);
+ emit_zext_32(ctx, dst, is32);
+ break;
+ case 8:
+ move_reg(ctx, t1, src);
+ emit_insn(ctx, extwb, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ break;
+ case 16:
+ move_reg(ctx, t1, src);
+ emit_insn(ctx, extwh, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ break;
+ case 32:
+ emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
+ break;
+ }
break;
/* dst = imm */
@@ -533,39 +554,71 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* dst = dst / src */
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
- emit_zext_32(ctx, dst, is32);
- move_reg(ctx, t1, src);
- emit_zext_32(ctx, t1, is32);
- emit_insn(ctx, divdu, dst, dst, t1);
- emit_zext_32(ctx, dst, is32);
+ if (!off) {
+ emit_zext_32(ctx, dst, is32);
+ move_reg(ctx, t1, src);
+ emit_zext_32(ctx, t1, is32);
+ emit_insn(ctx, divdu, dst, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ } else {
+ emit_sext_32(ctx, dst, is32);
+ move_reg(ctx, t1, src);
+ emit_sext_32(ctx, t1, is32);
+ emit_insn(ctx, divd, dst, dst, t1);
+ emit_sext_32(ctx, dst, is32);
+ }
break;
/* dst = dst / imm */
case BPF_ALU | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K:
- move_imm(ctx, t1, imm, is32);
- emit_zext_32(ctx, dst, is32);
- emit_insn(ctx, divdu, dst, dst, t1);
- emit_zext_32(ctx, dst, is32);
+ if (!off) {
+ move_imm(ctx, t1, imm, is32);
+ emit_zext_32(ctx, dst, is32);
+ emit_insn(ctx, divdu, dst, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ } else {
+ move_imm(ctx, t1, imm, false);
+ emit_sext_32(ctx, t1, is32);
+ emit_sext_32(ctx, dst, is32);
+ emit_insn(ctx, divd, dst, dst, t1);
+ emit_sext_32(ctx, dst, is32);
+ }
break;
/* dst = dst % src */
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- emit_zext_32(ctx, dst, is32);
- move_reg(ctx, t1, src);
- emit_zext_32(ctx, t1, is32);
- emit_insn(ctx, moddu, dst, dst, t1);
- emit_zext_32(ctx, dst, is32);
+ if (!off) {
+ emit_zext_32(ctx, dst, is32);
+ move_reg(ctx, t1, src);
+ emit_zext_32(ctx, t1, is32);
+ emit_insn(ctx, moddu, dst, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ } else {
+ emit_sext_32(ctx, dst, is32);
+ move_reg(ctx, t1, src);
+ emit_sext_32(ctx, t1, is32);
+ emit_insn(ctx, modd, dst, dst, t1);
+ emit_sext_32(ctx, dst, is32);
+ }
break;
/* dst = dst % imm */
case BPF_ALU | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_K:
- move_imm(ctx, t1, imm, is32);
- emit_zext_32(ctx, dst, is32);
- emit_insn(ctx, moddu, dst, dst, t1);
- emit_zext_32(ctx, dst, is32);
+ if (!off) {
+ move_imm(ctx, t1, imm, is32);
+ emit_zext_32(ctx, dst, is32);
+ emit_insn(ctx, moddu, dst, dst, t1);
+ emit_zext_32(ctx, dst, is32);
+ } else {
+ move_imm(ctx, t1, imm, false);
+ emit_sext_32(ctx, t1, is32);
+ emit_sext_32(ctx, dst, is32);
+ emit_insn(ctx, modd, dst, dst, t1);
+ emit_sext_32(ctx, dst, is32);
+ }
break;
/* dst = -dst */
@@ -711,6 +764,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
emit_insn(ctx, revb2h, dst, dst);
@@ -719,8 +773,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
break;
case 32:
emit_insn(ctx, revb2w, dst, dst);
- /* zero-extend 32 bits into 64 bits */
- emit_zext_32(ctx, dst, is32);
+ /* clear the upper 32 bits */
+ emit_zext_32(ctx, dst, true);
break;
case 64:
emit_insn(ctx, revbd, dst, dst);
@@ -827,7 +881,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* PC += off */
case BPF_JMP | BPF_JA:
- jmp_offset = bpf2la_offset(i, off, ctx);
+ case BPF_JMP32 | BPF_JA:
+ if (BPF_CLASS(code) == BPF_JMP)
+ jmp_offset = bpf2la_offset(i, off, ctx);
+ else
+ jmp_offset = bpf2la_offset(i, imm, ctx);
if (emit_uncond_jmp(ctx, jmp_offset) < 0)
goto toofar;
break;
@@ -880,31 +938,56 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ /* dst_reg = (s64)*(signed size *)(src_reg + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
switch (BPF_SIZE(code)) {
case BPF_B:
if (is_signed_imm12(off)) {
- emit_insn(ctx, ldbu, dst, src, off);
+ if (sign_extend)
+ emit_insn(ctx, ldb, dst, src, off);
+ else
+ emit_insn(ctx, ldbu, dst, src, off);
} else {
move_imm(ctx, t1, off, is32);
- emit_insn(ctx, ldxbu, dst, src, t1);
+ if (sign_extend)
+ emit_insn(ctx, ldxb, dst, src, t1);
+ else
+ emit_insn(ctx, ldxbu, dst, src, t1);
}
break;
case BPF_H:
if (is_signed_imm12(off)) {
- emit_insn(ctx, ldhu, dst, src, off);
+ if (sign_extend)
+ emit_insn(ctx, ldh, dst, src, off);
+ else
+ emit_insn(ctx, ldhu, dst, src, off);
} else {
move_imm(ctx, t1, off, is32);
- emit_insn(ctx, ldxhu, dst, src, t1);
+ if (sign_extend)
+ emit_insn(ctx, ldxh, dst, src, t1);
+ else
+ emit_insn(ctx, ldxhu, dst, src, t1);
}
break;
case BPF_W:
if (is_signed_imm12(off)) {
- emit_insn(ctx, ldwu, dst, src, off);
- } else if (is_signed_imm14(off)) {
- emit_insn(ctx, ldptrw, dst, src, off);
+ if (sign_extend)
+ emit_insn(ctx, ldw, dst, src, off);
+ else
+ emit_insn(ctx, ldwu, dst, src, off);
} else {
move_imm(ctx, t1, off, is32);
- emit_insn(ctx, ldxwu, dst, src, t1);
+ if (sign_extend)
+ emit_insn(ctx, ldxw, dst, src, t1);
+ else
+ emit_insn(ctx, ldxwu, dst, src, t1);
}
break;
case BPF_DW:
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index 5c97d14633..f597cd08a9 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -2,6 +2,7 @@
# Objects to go into the VDSO.
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Include the generic Makefile to check the built vdso.
@@ -83,13 +84,3 @@ $(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
obj-y += vdso.o
$(obj)/vdso.o : $(obj)/vdso.so
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso.so: $(obj)/vdso.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso.so