From 50ba0232fd5312410f1b65247e774244f89a628e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:36 +0200 Subject: Merging upstream version 6.8.9. Signed-off-by: Daniel Baumann --- arch/arm64/include/asm/assembler.h | 27 +++--- arch/arm64/include/asm/cache.h | 6 -- arch/arm64/include/asm/cpu.h | 1 - arch/arm64/include/asm/cpufeature.h | 8 +- arch/arm64/include/asm/esr.h | 15 ++++ arch/arm64/include/asm/fpsimdmacros.h | 8 -- arch/arm64/include/asm/irq.h | 2 + arch/arm64/include/asm/kasan.h | 22 +---- arch/arm64/include/asm/kernel-pgtable.h | 27 ++---- arch/arm64/include/asm/kvm_arm.h | 63 +++++++------ arch/arm64/include/asm/kvm_emulate.h | 34 +++---- arch/arm64/include/asm/kvm_host.h | 140 +++++++++++++++++++---------- arch/arm64/include/asm/kvm_mmu.h | 7 -- arch/arm64/include/asm/kvm_nested.h | 56 ++++++++++-- arch/arm64/include/asm/kvm_pgtable.h | 80 +++++++++++------ arch/arm64/include/asm/kvm_pkvm.h | 5 +- arch/arm64/include/asm/memory.h | 45 +++++++--- arch/arm64/include/asm/pgtable-prot.h | 2 + arch/arm64/include/asm/processor.h | 3 + arch/arm64/include/asm/simd.h | 11 +-- arch/arm64/include/asm/sparsemem.h | 2 +- arch/arm64/include/asm/spectre.h | 4 +- arch/arm64/include/asm/stacktrace/common.h | 19 +--- arch/arm64/include/asm/stacktrace/nvhe.h | 2 +- arch/arm64/include/asm/sysreg.h | 25 ++++++ arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/include/asm/tlb.h | 15 ++-- arch/arm64/include/asm/tlbflush.h | 118 ++++++++++++++---------- arch/arm64/include/asm/topology.h | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 10 +++ arch/arm64/include/asm/vdso.h | 3 - arch/arm64/include/asm/vncr_mapping.h | 103 +++++++++++++++++++++ 33 files changed, 565 insertions(+), 302 deletions(-) create mode 100644 arch/arm64/include/asm/vncr_mapping.h (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 376a980f2b..513787e433 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -12,7 +12,7 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H -#include +#include #include #include @@ -760,32 +760,25 @@ alternative_endif .endm /* - * Check whether preempt/bh-disabled asm code should yield as soon as - * it is able. This is the case if we are currently running in task - * context, and either a softirq is pending, or the TIF_NEED_RESCHED - * flag is set and re-enabling preemption a single time would result in - * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is - * stored negated in the top word of the thread_info::preempt_count + * Check whether asm code should yield as soon as it is able. This is + * the case if we are currently running in task context, and the + * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag + * is stored negated in the top word of the thread_info::preempt_count * field) */ - .macro cond_yield, lbl:req, tmp:req, tmp2:req + .macro cond_yield, lbl:req, tmp:req, tmp2 +#ifdef CONFIG_PREEMPT_VOLUNTARY get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] /* * If we are serving a softirq, there is no point in yielding: the * softirq will not be preempted no matter what we do, so we should - * run to completion as quickly as we can. + * run to completion as quickly as we can. The preempt_count field will + * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will + * catch this case too. */ - tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ -#ifdef CONFIG_PREEMPTION - sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif - adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING - get_this_cpu_offset \tmp2 - ldr w\tmp, [\tmp, \tmp2] - cbnz w\tmp, \lbl // yield on pending softirq in task context -.Lnoyield_\@: .endm /* diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ceb368d33b..06a4670bdb 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -58,7 +58,6 @@ static inline unsigned int arch_slab_minalign(void) #define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) #define ICACHEF_ALIASING 0 -#define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; /* @@ -70,11 +69,6 @@ static inline int icache_is_aliasing(void) return test_bit(ICACHEF_ALIASING, &__icache_flags); } -static __always_inline int icache_is_vpipt(void) -{ - return test_bit(ICACHEF_VPIPT, &__icache_flags); -} - static inline u32 cache_type_cwg(void) { return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype()); diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index f3034099fd..b1e43f56ee 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -38,7 +38,6 @@ struct cpuinfo_32bit { }; struct cpuinfo_arm64 { - struct cpu cpu; struct kobject kobj; u64 reg_ctr; u64 reg_cntfrq; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f6d416fe49..bd8d4ca81a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -83,7 +83,7 @@ struct arm64_ftr_bits { * to full-0 denotes that this field has no override * * A @mask field set to full-0 with the corresponding @val field set - * to full-1 denotes thath this field has an invalid override. + * to full-1 denotes that this field has an invalid override. */ struct arm64_ftr_override { u64 val; @@ -617,6 +617,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1) return val >= ID_AA64PFR1_EL1_MTE_MTE2; } +void __init setup_boot_cpu_features(void); void __init setup_system_features(void); void __init setup_user_features(void); @@ -819,6 +820,11 @@ static inline bool system_supports_tlb_range(void) return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_lpa2(void) +{ + return cpus_have_final_cap(ARM64_HAS_LPA2); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ae35939f39..353fe08546 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index cdf6a35e39..cda81d009c 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -242,14 +242,6 @@ | (\nx << 5) .endm -/* - * Zero the entire ZA array - * ZERO ZA - */ -.macro zero_za - .inst 0xc00800ff -.endm - .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 50ce8b697f..e93548914c 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLER__ +#include + #include void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 12d5f47f7d..7eefc525a9 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -15,29 +15,9 @@ #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +asmlinkage void kasan_early_init(void); void kasan_init(void); - -/* - * KASAN_SHADOW_START: beginning of the kernel virtual addresses. - * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, - * where N = (1 << KASAN_SHADOW_SCALE_SHIFT). - * - * KASAN_SHADOW_OFFSET: - * This value is used to map an address to the corresponding shadow - * address by the following formula: - * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET - * - * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range - * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual - * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation: - * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - - * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) - */ -#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT))) -#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) - void kasan_copy_shadow(pgd_t *pgdir); -asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 85d26143fa..83ddb14b95 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -37,27 +37,12 @@ /* - * If KASLR is enabled, then an offset K is added to the kernel address - * space. The bottom 21 bits of this offset are zero to guarantee 2MB - * alignment for PA and VA. - * - * For each pagetable level of the swapper, we know that the shift will - * be larger than 21 (for the 4KB granule case we use section maps thus - * the smallest shift is actually 30) thus there is the possibility that - * KASLR can increase the number of pagetable entries by 1, so we make - * room for this extra entry. - * - * Note KASLR cannot increase the number of required entries for a level - * by more than one because it increments both the virtual start and end - * addresses equally (the extra entry comes from the case where the end - * address is just pushed over a boundary and the start address isn't). + * A relocatable kernel may execute from an address that differs from the one at + * which it was linked. In the worst case, its runtime placement may intersect + * with two adjacent PGDIR entries, which means that an additional page table + * may be needed at each subordinate level. */ - -#ifdef CONFIG_RANDOMIZE_BASE -#define EARLY_KASLR (1) -#else -#define EARLY_KASLR (0) -#endif +#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE) #define SPAN_NR_ENTRIES(vstart, vend, shift) \ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) @@ -83,7 +68,7 @@ + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE)) /* the initial ID map may need two extra pages if it needs to be extended */ #if VA_BITS < 48 diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b85f46a73e..3c6f8ba1e4 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -108,6 +108,7 @@ #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ +#define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 @@ -122,6 +123,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_DS TCR_EL2_DS #define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) @@ -344,36 +346,47 @@ * Once we get to a point where the two describe the same thing, we'll * merge the definitions. One day. */ -#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) +#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0 #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK) -#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ - BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ - GENMASK(26, 25) | BIT(21) | BIT(18) | \ +/* + * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any + * future additions, define __HFGWTR* macros relative to __HFGRTR* ones. + */ +#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ + GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) -#define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) - -#define __HFGITR_EL2_RES0 GENMASK(63, 57) -#define __HFGITR_EL2_MASK GENMASK(54, 0) -#define __HFGITR_EL2_nMASK GENMASK(56, 55) - -#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \ - GENMASK(21, 20) | BIT(8)) -#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK -#define __HDFGRTR_EL2_nMASK GENMASK(62, 59) - -#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \ - BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \ - BIT(22) | BIT(9) | BIT(6)) -#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK -#define __HDFGWTR_EL2_nMASK GENMASK(62, 60) +#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK) + +#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0 +#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0)) +#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK) + +#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0 +#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \ + GENMASK(41, 40) | GENMASK(37, 22) | \ + GENMASK(19, 9) | GENMASK(7, 0)) +#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK) + +#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0 +#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \ + GENMASK(46, 44) | GENMASK(42, 41) | \ + GENMASK(37, 35) | GENMASK(33, 31) | \ + GENMASK(29, 23) | GENMASK(21, 10) | \ + GENMASK(8, 7) | GENMASK(5, 0)) +#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK) + +#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0 +#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0)) +#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK) /* Similar definitions for HCRX_EL2 */ -#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12)) -#define __HCRX_EL2_MASK (0) -#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0)) +#define __HCRX_EL2_RES0 HCRX_EL2_RES0 +#define __HCRX_EL2_MASK (BIT(6)) +#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b..b804fe8321 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); -static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature) -{ - return test_bit(feature, vcpu->kvm->arch.vcpu_features); -} - #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) { - return __is_hyp_ctxt(&vcpu->arch.ctxt); + return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt); } /* @@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; + return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu)); } -static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; + return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu)); +} + +static inline +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu) +{ + unsigned long esr = kvm_vcpu_get_esr(vcpu); + + BUG_ON(!esr_fsc_is_permission_fault(esr)); + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL)); } static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) @@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) * first), then a permission fault to allow the flags * to be set. */ - switch (kvm_vcpu_trap_get_fault_type(vcpu)) { - case ESR_ELx_FSC_PERM: - return true; - default: - return false; - } + return kvm_vcpu_trap_is_permission_fault(vcpu); } if (kvm_vcpu_trap_is_iabt(vcpu)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 824f29f049..21c57b8125 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include #include #include +#include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -306,6 +307,7 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) +#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) #define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; @@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; +/* + * VNCR() just places the VNCR_capable registers in the enum after + * __VNCR_START__, and the value (after correction) to be an 8-byte offset + * from the VNCR base. As we don't require the enum to be otherwise ordered, + * we need the terrible hack below to ensure that we correctly size the + * sys_regs array, no matter what. + * + * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful + * treasure trove of bit hacks: + * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + */ +#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y)))) +#define VNCR(r) \ + __before_##r, \ + r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ + __after_##r = __MAX__(__before_##r - 1, r) + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CLIDR_EL1, /* Cache Level ID Register */ CSSELR_EL1, /* Cache Size Selection Register */ - SCTLR_EL1, /* System Control Register */ - ACTLR_EL1, /* Auxiliary Control Register */ - CPACR_EL1, /* Coprocessor Access Control */ - ZCR_EL1, /* SVE Control */ - TTBR0_EL1, /* Translation Table Base Register 0 */ - TTBR1_EL1, /* Translation Table Base Register 1 */ - TCR_EL1, /* Translation Control Register */ - TCR2_EL1, /* Extended Translation Control Register */ - ESR_EL1, /* Exception Syndrome Register */ - AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ - AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ - FAR_EL1, /* Fault Address Register */ - MAIR_EL1, /* Memory Attribute Indirection Register */ - VBAR_EL1, /* Vector Base Address Register */ - CONTEXTIDR_EL1, /* Context ID Register */ TPIDR_EL0, /* Thread ID, User R/W */ TPIDRRO_EL0, /* Thread ID, User R/O */ TPIDR_EL1, /* Thread ID, Privileged */ - AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ CNTKCTL_EL1, /* Timer Control Register (EL1) */ PAR_EL1, /* Physical Address Register */ - MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ @@ -381,26 +383,11 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, - ELR_EL1, - SP_EL1, - SPSR_EL1, - - CNTVOFF_EL2, - CNTV_CVAL_EL0, - CNTV_CTL_EL0, - CNTP_CVAL_EL0, - CNTP_CTL_EL0, - /* Memory Tagging Extension registers */ RGSR_EL1, /* Random Allocation Tag Seed Register */ GCR_EL1, /* Tag Control Register */ - TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ - /* Permission Indirection Extension registers */ - PIR_EL1, /* Permission Indirection Register 1 (EL1) */ - PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ - /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -408,21 +395,14 @@ enum vcpu_sysreg { DBGVCR32_EL2, /* Debug Vector Catch Register */ /* EL2 registers */ - VPIDR_EL2, /* Virtualization Processor ID Register */ - VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */ SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ - HCR_EL2, /* Hypervisor Configuration Register */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ - HSTR_EL2, /* Hypervisor System Trap Register */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ - HCRX_EL2, /* Extended Hypervisor Configuration Register */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ - VTTBR_EL2, /* Virtualization Translation Table Base Register */ - VTCR_EL2, /* Virtualization Translation Control Register */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ @@ -435,19 +415,62 @@ enum vcpu_sysreg { VBAR_EL2, /* Vector Base Address Register (EL2) */ RVBAR_EL2, /* Reset Vector Base Address Register */ CONTEXTIDR_EL2, /* Context ID Register (EL2) */ - TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ - HFGRTR_EL2, - HFGWTR_EL2, - HFGITR_EL2, - HDFGRTR_EL2, - HDFGWTR_EL2, CNTHP_CTL_EL2, CNTHP_CVAL_EL2, CNTHV_CTL_EL2, CNTHV_CVAL_EL2, + __VNCR_START__, /* Any VNCR-capable reg goes after this point */ + + VNCR(SCTLR_EL1),/* System Control Register */ + VNCR(ACTLR_EL1),/* Auxiliary Control Register */ + VNCR(CPACR_EL1),/* Coprocessor Access Control */ + VNCR(ZCR_EL1), /* SVE Control */ + VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */ + VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */ + VNCR(TCR_EL1), /* Translation Control Register */ + VNCR(TCR2_EL1), /* Extended Translation Control Register */ + VNCR(ESR_EL1), /* Exception Syndrome Register */ + VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */ + VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */ + VNCR(FAR_EL1), /* Fault Address Register */ + VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */ + VNCR(VBAR_EL1), /* Vector Base Address Register */ + VNCR(CONTEXTIDR_EL1), /* Context ID Register */ + VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */ + VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */ + VNCR(ELR_EL1), + VNCR(SP_EL1), + VNCR(SPSR_EL1), + VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */ + VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */ + VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */ + VNCR(HCR_EL2), /* Hypervisor Configuration Register */ + VNCR(HSTR_EL2), /* Hypervisor System Trap Register */ + VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */ + VNCR(VTCR_EL2), /* Virtualization Translation Control Register */ + VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */ + VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */ + + /* Permission Indirection Extension registers */ + VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ + VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + + VNCR(HFGRTR_EL2), + VNCR(HFGWTR_EL2), + VNCR(HFGITR_EL2), + VNCR(HDFGRTR_EL2), + VNCR(HDFGWTR_EL2), + VNCR(HAFGRTR_EL2), + + VNCR(CNTVOFF_EL2), + VNCR(CNTV_CVAL_EL0), + VNCR(CNTV_CTL_EL0), + VNCR(CNTP_CVAL_EL0), + VNCR(CNTP_CTL_EL0), + NR_SYS_REGS /* Nothing after this line! */ }; @@ -464,6 +487,9 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; + + /* This pointer has to be 4kB aligned. */ + u64 *vncr_array; }; struct kvm_host_data { @@ -826,8 +852,19 @@ struct kvm_vcpu_arch { * accessed by a running VCPU. For example, for userspace access or * for system registers that are never context switched, but only * emulated. + * + * Don't bother with VNCR-based accesses in the nVHE code, it has no + * business dealing with NV. */ -#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) +static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +{ +#if !defined (__KVM_NVHE_HYPERVISOR__) + if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + r >= __VNCR_START__ && ctxt->vncr_array)) + return &ctxt->vncr_array[r - __VNCR_START__]; +#endif + return (u64 *)&ctxt->sys_regs[r]; +} #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) @@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; case PAR_EL1: *val = read_sysreg_par(); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; @@ -954,8 +993,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); -#define KVM_ARCH_WANT_MMU_NOTIFIER - void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -1177,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_vm_has_ran_once(kvm) \ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) +static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) +{ + return test_bit(feature, ka->vcpu_features); +} + +#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 49e0d4b36b..e3e793d0ec 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -243,13 +243,6 @@ static inline size_t __invalidate_icache_max_range(void) static inline void __invalidate_icache_guest_page(void *va, size_t size) { - /* - * VPIPT I-cache maintenance must be done from EL2. See comment in the - * nVHE flavor of __kvm_tlb_flush_vmid_ipa(). - */ - if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2) - return; - /* * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the * invalidation range exceeds our arbitrary limit on invadations by diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6cec8e9c6c..4882905357 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -2,8 +2,9 @@ #ifndef __ARM64_KVM_NESTED_H #define __ARM64_KVM_NESTED_H -#include +#include #include +#include static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); +/* Translation helpers from non-VHE EL2 to EL1 */ +static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) +{ + return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT; +} + +static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) +{ + return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | + tcr_el2_ps_to_tcr_el1_ips(tcr) | + (tcr & TCR_EL2_TG0_MASK) | + (tcr & TCR_EL2_ORGN0_MASK) | + (tcr & TCR_EL2_IRGN0_MASK) | + (tcr & TCR_EL2_T0SZ_MASK); +} + +static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) +{ + u64 cpacr_el1 = 0; + + if (cptr_el2 & CPTR_EL2_TTA) + cpacr_el1 |= CPACR_ELx_TTA; + if (!(cptr_el2 & CPTR_EL2_TFP)) + cpacr_el1 |= CPACR_ELx_FPEN; + if (!(cptr_el2 & CPTR_EL2_TZ)) + cpacr_el1 |= CPACR_ELx_ZEN; -struct sys_reg_params; -struct sys_reg_desc; + return cpacr_el1; +} + +static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val) +{ + /* Only preserve the minimal set of bits we support */ + val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA | + SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE); + val |= SCTLR_EL1_RES1; + + return val; +} + +static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) +{ + /* Clear the ASID field */ + return ttbr0 & ~GENMASK_ULL(63, 48); +} + +extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); -void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, - const struct sys_reg_desc *r); +int kvm_init_nv_sysregs(struct kvm *kvm); #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb83..cfdf40f734 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,7 +11,8 @@ #include #include -#define KVM_PGTABLE_MAX_LEVELS 4U +#define KVM_PGTABLE_FIRST_LEVEL -1 +#define KVM_PGTABLE_LAST_LEVEL 3 /* * The largest supported block sizes for KVM (no 52-bit PA support): @@ -20,17 +21,29 @@ * - 64K (level 2): 512MB */ #ifdef CONFIG_ARM64_4K_PAGES -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1 #else -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2 #endif +#define kvm_lpa2_is_enabled() system_supports_lpa2() + +static inline u64 kvm_get_parange_max(void) +{ + if (kvm_lpa2_is_enabled() || + (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16)) + return ID_AA64MMFR0_EL1_PARANGE_52; + else + return ID_AA64MMFR0_EL1_PARANGE_48; +} + static inline u64 kvm_get_parange(u64 mmfr0) { + u64 parange_max = kvm_get_parange_max(); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX) - parange = ID_AA64MMFR0_EL1_PARANGE_MAX; + if (parange > parange_max) + parange = parange_max; return parange; } @@ -41,6 +54,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8) #define KVM_PHYS_INVALID (-1ULL) @@ -51,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte) static inline u64 kvm_pte_to_phys(kvm_pte_t pte) { - u64 pa = pte & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + u64 pa; + + if (kvm_lpa2_is_enabled()) { + pa = pte & KVM_PTE_ADDR_MASK_LPA2; + pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50; + } else { + pa = pte & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + } return pa; } static inline kvm_pte_t kvm_phys_to_pte(u64 pa) { - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) { - pa &= GENMASK(51, 48); - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + kvm_pte_t pte; + + if (kvm_lpa2_is_enabled()) { + pte = pa & KVM_PTE_ADDR_MASK_LPA2; + pa &= GENMASK(51, 50); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50); + } else { + pte = pa & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } } return pte; @@ -76,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) return __phys_to_pfn(kvm_pte_to_phys(pte)); } -static inline u64 kvm_granule_shift(u32 level) +static inline u64 kvm_granule_shift(s8 level) { - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); } -static inline u64 kvm_granule_size(u32 level) +static inline u64 kvm_granule_size(s8 level) { return BIT(kvm_granule_shift(level)); } -static inline bool kvm_level_supports_block_mapping(u32 level) +static inline bool kvm_level_supports_block_mapping(s8 level) { return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } static inline u32 kvm_supported_block_sizes(void) { - u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; u32 r = 0; - for (; level < KVM_PGTABLE_MAX_LEVELS; level++) + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) r |= BIT(kvm_granule_shift(level)); return r; @@ -142,7 +170,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); - void (*free_unlinked_table)(void *addr, u32 level); + void (*free_unlinked_table)(void *addr, s8 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -238,7 +266,7 @@ struct kvm_pgtable_visit_ctx { u64 start; u64 addr; u64 end; - u32 level; + s8 level; enum kvm_pgtable_walk_flags flags; }; @@ -341,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void) */ struct kvm_pgtable { u32 ia_bits; - u32 start_level; + s8 start_level; kvm_pteref_t pgd; struct kvm_pgtable_mm_ops *mm_ops; @@ -475,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * The page-table is assumed to be unreachable by any hardware walkers prior to * freeing and therefore no TLB invalidation is performed. */ -void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); /** * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure. @@ -499,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p * an ERR_PTR(error) on failure. */ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, - u64 phys, u32 level, + u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte); @@ -725,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level); + kvm_pte_t *ptep, s8 *level); /** * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index e46250a020..ad9cfb5c1f 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void) static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { - unsigned long total = 0, i; + unsigned long total = 0; + int i; /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) { nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); total += nr_pages; } diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fde4186cc3..d82305ab42 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -65,15 +65,41 @@ #define KERNEL_END _end /* - * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual - * address space for the shadow region respectively. They can bloat the stack - * significantly, so double the (minimum) stack size when they are in use. + * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the + * kernel virtual address space for storing the shadow memory respectively. + * + * The mapping between a virtual memory address and its corresponding shadow + * memory address is defined based on the formula: + * + * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET + * + * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map + * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets + * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of + * the shadow memory region. + * + * Based on this mapping, we define two constants: + * + * KASAN_SHADOW_START: the start of the shadow memory region; + * KASAN_SHADOW_END: the end of the shadow memory region. + * + * KASAN_SHADOW_END is defined first as the shadow address that corresponds to + * the upper bound of possible virtual kernel memory addresses UL(1) << 64 + * according to the mapping formula. + * + * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow + * memory start must map to the lowest possible kernel virtual memory address + * and thus it depends on the actual bitness of the address space. + * + * As KASAN inserts redzones between stack variables, this increases the stack + * memory usage significantly. Thus, we double the (minimum) stack size. */ #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) -#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \ - + KASAN_SHADOW_OFFSET) -#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET) +#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) +#define PAGE_END KASAN_SHADOW_START #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 @@ -182,6 +208,7 @@ #include #include #include +#include #if VA_BITS > 48 extern u64 vabits_actual; @@ -193,15 +220,12 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image */ -extern u64 kimage_vaddr; - /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; static inline unsigned long kaslr_offset(void) { - return kimage_vaddr - KIMAGE_VADDR; + return (u64)&_text - KIMAGE_VADDR; } #ifdef CONFIG_RANDOMIZE_BASE @@ -407,6 +431,5 @@ void dump_mem_limit(void); #define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8) #endif -#include #endif /* __ASM_MEMORY_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index e9624f6326..483dbfa39c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#define lpa2_is_enabled() false + /* * If we have userspace only BTI we don't want to mark kernel pages * guarded even if the system does support BTI. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e5bc54522e..5b0a04810b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -167,6 +167,9 @@ struct thread_struct { unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ + + struct user_fpsimd_state kernel_fpsimd_state; + unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 6a75d7ecdc..8e86c9e70e 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,8 +12,6 @@ #include #include -DECLARE_PER_CPU(bool, fpsimd_context_busy); - #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void) /* * We must make sure that the SVE has been initialized properly * before using the SIMD in kernel. - * fpsimd_context_busy is only set while preemption is disabled, - * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy - * cannot change under our feet -- if it's set we cannot be - * migrated, and if it's clear we cannot be migrated to a CPU - * where it is set. */ return !WARN_ON(!system_capabilities_finalized()) && system_supports_fpsimd() && - !in_hardirq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(fpsimd_context_busy); + !in_hardirq() && !irqs_disabled() && !in_nmi(); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 5f54376210..8a8acc2203 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -10,7 +10,7 @@ /* * Section size must be at least 512MB for 64K base * page size config. Otherwise it will be less than - * MAX_ORDER and the build process will fail. + * MAX_PAGE_ORDER and the build process will fail. */ #ifdef CONFIG_ARM64_64K_PAGES #define SECTION_SIZE_BITS 29 diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 06c357d83b..0c4d9045c3 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -13,8 +13,8 @@ #define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) #ifndef __ASSEMBLY__ - -#include +#include +#include #include #include diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 508f734de4..f63dc654e5 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -9,7 +9,6 @@ #ifndef __ASM_STACKTRACE_COMMON_H #define __ASM_STACKTRACE_COMMON_H -#include #include struct stack_info { @@ -23,12 +22,6 @@ struct stack_info { * @fp: The fp value in the frame record (or the real fp) * @pc: The lr value in the frame record (or the real lr) * - * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance - * associated with the most recently encountered replacement lr - * value. - * - * @task: The task being unwound. - * * @stack: The stack currently being unwound. * @stacks: An array of stacks which can be unwound. * @nr_stacks: The number of stacks in @stacks. @@ -36,10 +29,6 @@ struct stack_info { struct unwind_state { unsigned long fp; unsigned long pc; -#ifdef CONFIG_KRETPROBES - struct llist_node *kr_cur; -#endif - struct task_struct *task; struct stack_info stack; struct stack_info *stacks; @@ -66,14 +55,8 @@ static inline bool stackinfo_on_stack(const struct stack_info *info, return true; } -static inline void unwind_init_common(struct unwind_state *state, - struct task_struct *task) +static inline void unwind_init_common(struct unwind_state *state) { - state->task = task; -#ifdef CONFIG_KRETPROBES - state->kr_cur = NULL; -#endif - state->stack = stackinfo_get_unknown(); } diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h index 25ab83a315..44759281d0 100644 --- a/arch/arm64/include/asm/stacktrace/nvhe.h +++ b/arch/arm64/include/asm/stacktrace/nvhe.h @@ -31,7 +31,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state, unsigned long fp, unsigned long pc) { - unwind_init_common(state, NULL); + unwind_init_common(state); state->fp = fp; state->pc = pc; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5e65f51c10..c3b19b376c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -645,6 +645,7 @@ #define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) #define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) #define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) #define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) #define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) #define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) @@ -781,10 +782,16 @@ #define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) /* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + #define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) #define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) #define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) /* Common SCTLR_ELx flags. */ @@ -871,10 +878,12 @@ /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 @@ -882,6 +891,7 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 @@ -892,11 +902,13 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT @@ -1039,6 +1051,19 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +/* + * Permission Overlay Extension (POE) permission encodings. + */ +#define POE_NONE UL(0x0) +#define POE_R UL(0x1) +#define POE_X UL(0x2) +#define POE_RX UL(0x3) +#define POE_W UL(0x4) +#define POE_RW UL(0x5) +#define POE_XW UL(0x6) +#define POE_RXW UL(0x7) +#define POE_MASK UL(0xf) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 553d1bc559..e72a3bf9e5 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -80,6 +80,7 @@ void arch_setup_new_exec(void); #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ +#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 846c563689..0150deb332 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb); #include /* - * get the tlbi levels in arm64. Default value is 0 if more than one - * of cleared_* is set or neither is set. - * Arm64 doesn't support p4ds now. + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than + * one of cleared_* is set or neither is set - this elides the level hinting to + * the hardware. */ static inline int tlb_get_level(struct mmu_gather *tlb) { /* The TTL field is only valid for the leaf entry. */ if (tlb->freed_tables) - return 0; + return TLBI_TTL_UNKNOWN; if (tlb->cleared_ptes && !(tlb->cleared_pmds || tlb->cleared_puds || @@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb) tlb->cleared_p4ds)) return 1; - return 0; + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_puds)) + return 0; + + return TLBI_TTL_UNKNOWN; } static inline void tlb_flush(struct mmu_gather *tlb) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bb2c2833a9..bfeb54f3a9 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (baddr); \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= __ttl << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -152,12 +161,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -216,12 +231,16 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. * * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented @@ -345,34 +364,40 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * @tlb_level: Translation Table level hint, if known * @tlbi_user: If 'true', call an additional __tlbi_user() * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below * * When the CPU does not support TLB range operations, flush the TLB * entries one by one at the granularity of 'stride'. If the TLB * range ops are supported, then: * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. */ #define __flush_tlb_range_op(op, start, pages, stride, \ - asid, tlb_level, tlbi_user) \ + asid, tlb_level, tlbi_user, lpa2) \ do { \ int num = 0; \ - int scale = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ while (pages > 0) { \ if (!system_supports_tlb_range() || \ - pages % 2 == 1) { \ + pages == 1 || \ + (lpa2 && start != ALIGN(start, SZ_64K))) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ @@ -384,20 +409,20 @@ do { \ \ num = __TLBI_RANGE_NUM(pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start, asid, scale, \ - num, tlb_level); \ + addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ pages -= __TLBI_RANGE_PAGES(num, scale); \ } \ - scale++; \ + scale--; \ } \ } while (0) #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -427,9 +452,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, asid = ASID(vma->vm_mm); if (last_level) - __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); else - __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); @@ -441,9 +468,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 9fab663dd2..a323b109b9 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -23,6 +23,7 @@ void update_freq_counters_refs(void); #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant +#define arch_scale_freq_ref topology_get_freq_ref #ifdef CONFIG_ACPI_CPPC_LIB #define arch_init_invariance_cppc topology_init_cpu_capacity_cppc diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 531effca5f..491b2b9bd5 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 457 +#define __NR_compat_syscalls 462 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 9f7c1bf995..7118282d1c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -919,6 +919,16 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake) __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index b4ae321099..4305995c8f 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,9 +17,6 @@ #ifndef __ASSEMBLY__ #include -#ifdef CONFIG_COMPAT_VDSO -#include -#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h new file mode 100644 index 0000000000..df2c47c559 --- /dev/null +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * System register offsets in the VNCR page + * All offsets are *byte* displacements! + */ + +#ifndef __ARM64_VNCR_MAPPING_H__ +#define __ARM64_VNCR_MAPPING_H__ + +#define VNCR_VTTBR_EL2 0x020 +#define VNCR_VTCR_EL2 0x040 +#define VNCR_VMPIDR_EL2 0x050 +#define VNCR_CNTVOFF_EL2 0x060 +#define VNCR_HCR_EL2 0x078 +#define VNCR_HSTR_EL2 0x080 +#define VNCR_VPIDR_EL2 0x088 +#define VNCR_TPIDR_EL2 0x090 +#define VNCR_HCRX_EL2 0x0A0 +#define VNCR_VNCR_EL2 0x0B0 +#define VNCR_CPACR_EL1 0x100 +#define VNCR_CONTEXTIDR_EL1 0x108 +#define VNCR_SCTLR_EL1 0x110 +#define VNCR_ACTLR_EL1 0x118 +#define VNCR_TCR_EL1 0x120 +#define VNCR_AFSR0_EL1 0x128 +#define VNCR_AFSR1_EL1 0x130 +#define VNCR_ESR_EL1 0x138 +#define VNCR_MAIR_EL1 0x140 +#define VNCR_AMAIR_EL1 0x148 +#define VNCR_MDSCR_EL1 0x158 +#define VNCR_SPSR_EL1 0x160 +#define VNCR_CNTV_CVAL_EL0 0x168 +#define VNCR_CNTV_CTL_EL0 0x170 +#define VNCR_CNTP_CVAL_EL0 0x178 +#define VNCR_CNTP_CTL_EL0 0x180 +#define VNCR_SCXTNUM_EL1 0x188 +#define VNCR_TFSR_EL1 0x190 +#define VNCR_HFGRTR_EL2 0x1B8 +#define VNCR_HFGWTR_EL2 0x1C0 +#define VNCR_HFGITR_EL2 0x1C8 +#define VNCR_HDFGRTR_EL2 0x1D0 +#define VNCR_HDFGWTR_EL2 0x1D8 +#define VNCR_ZCR_EL1 0x1E0 +#define VNCR_HAFGRTR_EL2 0x1E8 +#define VNCR_TTBR0_EL1 0x200 +#define VNCR_TTBR1_EL1 0x210 +#define VNCR_FAR_EL1 0x220 +#define VNCR_ELR_EL1 0x230 +#define VNCR_SP_EL1 0x240 +#define VNCR_VBAR_EL1 0x250 +#define VNCR_TCR2_EL1 0x270 +#define VNCR_PIRE0_EL1 0x290 +#define VNCR_PIRE0_EL2 0x298 +#define VNCR_PIR_EL1 0x2A0 +#define VNCR_ICH_LR0_EL2 0x400 +#define VNCR_ICH_LR1_EL2 0x408 +#define VNCR_ICH_LR2_EL2 0x410 +#define VNCR_ICH_LR3_EL2 0x418 +#define VNCR_ICH_LR4_EL2 0x420 +#define VNCR_ICH_LR5_EL2 0x428 +#define VNCR_ICH_LR6_EL2 0x430 +#define VNCR_ICH_LR7_EL2 0x438 +#define VNCR_ICH_LR8_EL2 0x440 +#define VNCR_ICH_LR9_EL2 0x448 +#define VNCR_ICH_LR10_EL2 0x450 +#define VNCR_ICH_LR11_EL2 0x458 +#define VNCR_ICH_LR12_EL2 0x460 +#define VNCR_ICH_LR13_EL2 0x468 +#define VNCR_ICH_LR14_EL2 0x470 +#define VNCR_ICH_LR15_EL2 0x478 +#define VNCR_ICH_AP0R0_EL2 0x480 +#define VNCR_ICH_AP0R1_EL2 0x488 +#define VNCR_ICH_AP0R2_EL2 0x490 +#define VNCR_ICH_AP0R3_EL2 0x498 +#define VNCR_ICH_AP1R0_EL2 0x4A0 +#define VNCR_ICH_AP1R1_EL2 0x4A8 +#define VNCR_ICH_AP1R2_EL2 0x4B0 +#define VNCR_ICH_AP1R3_EL2 0x4B8 +#define VNCR_ICH_HCR_EL2 0x4C0 +#define VNCR_ICH_VMCR_EL2 0x4C8 +#define VNCR_VDISR_EL2 0x500 +#define VNCR_PMBLIMITR_EL1 0x800 +#define VNCR_PMBPTR_EL1 0x810 +#define VNCR_PMBSR_EL1 0x820 +#define VNCR_PMSCR_EL1 0x828 +#define VNCR_PMSEVFR_EL1 0x830 +#define VNCR_PMSICR_EL1 0x838 +#define VNCR_PMSIRR_EL1 0x840 +#define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_TRFCR_EL1 0x880 +#define VNCR_MPAM1_EL1 0x900 +#define VNCR_MPAMHCR_EL2 0x930 +#define VNCR_MPAMVPMV_EL2 0x938 +#define VNCR_MPAMVPM0_EL2 0x940 +#define VNCR_MPAMVPM1_EL2 0x948 +#define VNCR_MPAMVPM2_EL2 0x950 +#define VNCR_MPAMVPM3_EL2 0x958 +#define VNCR_MPAMVPM4_EL2 0x960 +#define VNCR_MPAMVPM5_EL2 0x968 +#define VNCR_MPAMVPM6_EL2 0x970 +#define VNCR_MPAMVPM7_EL2 0x978 + +#endif /* __ARM64_VNCR_MAPPING_H__ */ -- cgit v1.2.3