summaryrefslogtreecommitdiffstats
path: root/arch/arm64/include
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:22 +0000
commitb20732900e4636a467c0183a47f7396700f5f743 (patch)
tree42f079ff82e701ebcb76829974b4caca3e5b6798 /arch/arm64/include
parentAdding upstream version 6.8.12. (diff)
downloadlinux-b20732900e4636a467c0183a47f7396700f5f743.tar.xz
linux-b20732900e4636a467c0183a47f7396700f5f743.zip
Adding upstream version 6.9.7.upstream/6.9.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/archrandom.h2
-rw-r--r--arch/arm64/include/asm/asm-bug.h1
-rw-r--r--arch/arm64/include/asm/assembler.h59
-rw-r--r--arch/arm64/include/asm/brk-imm.h2
-rw-r--r--arch/arm64/include/asm/cpu.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h114
-rw-r--r--arch/arm64/include/asm/crash_reserve.h (renamed from arch/arm64/include/asm/crash_core.h)4
-rw-r--r--arch/arm64/include/asm/elf.h10
-rw-r--r--arch/arm64/include/asm/esr.h13
-rw-r--r--arch/arm64/include/asm/exception.h2
-rw-r--r--arch/arm64/include/asm/fixmap.h2
-rw-r--r--arch/arm64/include/asm/fpsimd.h4
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h1
-rw-r--r--arch/arm64/include/asm/hwcap.h15
-rw-r--r--arch/arm64/include/asm/hyperv-tlfs.h45
-rw-r--r--arch/arm64/include/asm/io.h12
-rw-r--r--arch/arm64/include/asm/kasan.h2
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h103
-rw-r--r--arch/arm64/include/asm/kexec.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h13
-rw-r--r--arch/arm64/include/asm/kvm_host.h100
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h46
-rw-r--r--arch/arm64/include/asm/kvm_nested.h1
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h2
-rw-r--r--arch/arm64/include/asm/memory.h33
-rw-r--r--arch/arm64/include/asm/mmu.h40
-rw-r--r--arch/arm64/include/asm/mmu_context.h53
-rw-r--r--arch/arm64/include/asm/mshyperv.h4
-rw-r--r--arch/arm64/include/asm/page-def.h2
-rw-r--r--arch/arm64/include/asm/patching.h2
-rw-r--r--arch/arm64/include/asm/pgalloc.h52
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h33
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h20
-rw-r--r--arch/arm64/include/asm/pgtable-types.h6
-rw-r--r--arch/arm64/include/asm/pgtable.h666
-rw-r--r--arch/arm64/include/asm/processor.h4
-rw-r--r--arch/arm64/include/asm/ptdump.h7
-rw-r--r--arch/arm64/include/asm/scs.h36
-rw-r--r--arch/arm64/include/asm/setup.h3
-rw-r--r--arch/arm64/include/asm/sysreg.h29
-rw-r--r--arch/arm64/include/asm/tlb.h3
-rw-r--r--arch/arm64/include/asm/tlbflush.h13
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h3
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h15
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h15
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h8
-rw-r--r--arch/arm64/include/uapi/asm/sve_context.h11
49 files changed, 1217 insertions, 408 deletions
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index ecdb3cfcd0..8babfbe31f 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -129,6 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
-u64 kaslr_early_init(void *fdt);
-
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index c762038ba4..6e73809f64 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -28,6 +28,7 @@
14470: .long 14471f - .; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
+ .align 2; \
.popsection; \
14471:
#else
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 513787e433..ab8b396428 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -38,10 +38,6 @@
msr daifset, #0xf
.endm
- .macro enable_daif
- msr daifclr, #0xf
- .endm
-
/*
* Save/restore interrupts.
*/
@@ -346,20 +342,6 @@ alternative_cb_end
.endm
/*
- * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
- *
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of _end.
- */
- .macro idmap_get_t0sz, reg
- adrp \reg, _end
- orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
- clz \reg, \reg
- .endm
-
-/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
*
@@ -590,18 +572,27 @@ alternative_endif
.endm
/*
- * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * If the kernel is built for 52-bit virtual addressing but the hardware only
+ * supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
+ * but we have to add an offset so that the TTBR1 address corresponds with the
+ * pgdir entry that covers the lowest 48-bit addressable VA.
+ *
+ * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
+ * additional paging level, and on LPA2/16k pages, we would end up with a root
+ * level table with only 2 entries, which is suboptimal in terms of TLB
+ * utilization, so there we fall back to 47 bits of translation if LPA2 is not
+ * supported.
+ *
* orr is used as it can cover the immediate value (and is idempotent).
- * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
* ttbr: Value of ttbr to set, modified.
*/
.macro offset_ttbr1, ttbr, tmp
-#ifdef CONFIG_ARM64_VA_BITS_52
- mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
- and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
- cbnz \tmp, .Lskipoffs_\@
- orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
-.Lskipoffs_\@ :
+#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
+ mrs \tmp, tcr_el1
+ and \tmp, \tmp, #TCR_T1SZ_MASK
+ cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
+ orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
+ csel \ttbr, \tmp, \ttbr, eq
#endif
.endm
@@ -623,25 +614,13 @@ alternative_endif
.macro phys_to_pte, pte, phys
#ifdef CONFIG_ARM64_PA_BITS_52
- /*
- * We assume \phys is 64K aligned and this is guaranteed by only
- * supporting this configuration with 64K pages.
- */
- orr \pte, \phys, \phys, lsr #36
- and \pte, \pte, #PTE_ADDR_MASK
+ orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
+ and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
#else
mov \pte, \phys
#endif
.endm
- .macro pte_to_phys, phys, pte
- and \phys, \pte, #PTE_ADDR_MASK
-#ifdef CONFIG_ARM64_PA_BITS_52
- orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT
- and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT)
-#endif
- .endm
-
/*
* tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
*/
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index 1abdcd508a..beb42c62b6 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -11,6 +11,7 @@
* 0x004: for installing kprobes
* 0x005: for installing uprobes
* 0x006: for kprobe software single-step
+ * 0x007: for kretprobe return
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
@@ -23,6 +24,7 @@
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
#define KPROBES_BRK_SS_IMM 0x006
+#define KRETPROBES_BRK_IMM 0x007
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b1e43f56ee..9b73fd0cd7 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -52,14 +52,18 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64isar0;
u64 reg_id_aa64isar1;
u64 reg_id_aa64isar2;
+ u64 reg_id_aa64isar3;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
u64 reg_id_aa64mmfr2;
u64 reg_id_aa64mmfr3;
+ u64 reg_id_aa64mmfr4;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64pfr2;
u64 reg_id_aa64zfr0;
u64 reg_id_aa64smfr0;
+ u64 reg_id_aa64fpfr0;
struct cpuinfo_32bit aarch32;
};
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index bd8d4ca81a..8b904a757b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -17,6 +17,7 @@
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
#ifndef __ASSEMBLY__
@@ -363,6 +364,7 @@ struct arm64_cpu_capabilities {
u8 field_pos;
u8 field_width;
u8 min_field_value;
+ u8 max_field_value;
u8 hwcap_type;
bool sign;
unsigned long hwcap;
@@ -768,6 +770,11 @@ static __always_inline bool system_supports_tpidr2(void)
return system_supports_sme();
}
+static __always_inline bool system_supports_fpmr(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
+}
+
static __always_inline bool system_supports_cnp(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_CNP);
@@ -905,7 +912,9 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+extern struct arm64_ftr_override id_aa64mmfr0_override;
extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64mmfr2_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
@@ -915,9 +924,114 @@ extern struct arm64_ftr_override id_aa64isar2_override;
extern struct arm64_ftr_override arm64_sw_feature_override;
+static inline
+u64 arm64_apply_feature_override(u64 val, int feat, int width,
+ const struct arm64_ftr_override *override)
+{
+ u64 oval = override->val;
+
+ /*
+ * When it encounters an invalid override (e.g., an override that
+ * cannot be honoured due to a missing CPU feature), the early idreg
+ * override code will set the mask to 0x0 and the value to non-zero for
+ * the field in question. In order to determine whether the override is
+ * valid or not for the field we are interested in, we first need to
+ * disregard bits belonging to other fields.
+ */
+ oval &= GENMASK_ULL(feat + width - 1, feat);
+
+ /*
+ * The override is valid if all value bits are accounted for in the
+ * mask. If so, replace the masked bits with the override value.
+ */
+ if (oval == (oval & override->mask)) {
+ val &= ~override->mask;
+ val |= oval;
+ }
+
+ /* Extract the field from the updated value */
+ return cpuid_feature_extract_unsigned_field(val, feat);
+}
+
+static inline bool arm64_test_sw_feature_override(int feat)
+{
+ /*
+ * Software features are pseudo CPU features that have no underlying
+ * CPUID system register value to apply the override to.
+ */
+ return arm64_apply_feature_override(0, feat, 4,
+ &arm64_sw_feature_override);
+}
+
+static inline bool kaslr_disabled_cmdline(void)
+{
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
+}
+
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
+static inline bool cpu_has_bti(void)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_BTI))
+ return false;
+
+ return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
+ ID_AA64PFR1_EL1_BT_SHIFT, 4,
+ &id_aa64pfr1_override);
+}
+
+static inline bool cpu_has_pac(void)
+{
+ u64 isar1, isar2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
+
+ isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
+ &id_aa64isar2_override);
+}
+
+static inline bool cpu_has_lva(void)
+{
+ u64 mmfr2;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ mmfr2 &= ~id_aa64mmfr2_override.mask;
+ mmfr2 |= id_aa64mmfr2_override.val;
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_VARange_SHIFT);
+}
+
+static inline bool cpu_has_lpa2(void)
+{
+#ifdef CONFIG_ARM64_LPA2
+ u64 mmfr0;
+ int feat;
+
+ mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ mmfr0 &= ~id_aa64mmfr0_override.mask;
+ mmfr0 |= id_aa64mmfr0_override.val;
+ feat = cpuid_feature_extract_signed_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+
+ return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
+#else
+ return false;
+#endif
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/crash_core.h b/arch/arm64/include/asm/crash_reserve.h
index 9f5c8d339f..4afe027a4e 100644
--- a/arch/arm64/include/asm/crash_core.h
+++ b/arch/arm64/include/asm/crash_reserve.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ARM64_CRASH_CORE_H
-#define _ARM64_CRASH_CORE_H
+#ifndef _ARM64_CRASH_RESERVE_H
+#define _ARM64_CRASH_RESERVE_H
/* Current arm64 boot protocol requires 2MB alignment */
#define CRASH_ALIGN SZ_2M
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 97932fbf97..3f93f4eef9 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
-#ifdef CONFIG_COMPAT
-
-/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
-#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
-
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
typedef unsigned int compat_elf_greg_t;
typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+#ifdef CONFIG_COMPAT
+
+/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
+#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
+
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
int compat_elf_check_arch(const struct elf32_hdr *);
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 353fe08546..81606bf7d5 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -117,15 +117,9 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
-#define ESR_ELx_FSC_SEA_TTW0 (0x14)
-#define ESR_ELx_FSC_SEA_TTW1 (0x15)
-#define ESR_ELx_FSC_SEA_TTW2 (0x16)
-#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
-#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
-#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
-#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
-#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
+#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
@@ -394,6 +388,9 @@ static inline bool esr_is_data_abort(unsigned long esr)
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
+ /* Translation fault, level -1 */
+ if ((esr & ESR_ELx_FSC) == 0b101011)
+ return true;
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
}
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index ad688e157c..f296662590 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -74,7 +74,7 @@ void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
void do_el0_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
+void do_signal(struct pt_regs *regs);
void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 58c294a966..87e307804b 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -87,6 +87,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
+ FIX_P4D,
FIX_PGD,
__end_of_fixed_addresses
@@ -100,7 +101,6 @@ enum fixed_addresses {
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
void __init early_fixmap_init(void);
-void __init fixmap_copy(pgd_t *pgdir);
#define __early_set_fixmap __set_fixmap
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index b67b89c54e..bc69ac368d 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -21,7 +21,6 @@
#include <linux/stddef.h>
#include <linux/types.h>
-#ifdef CONFIG_COMPAT
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
@@ -30,7 +29,6 @@
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
-#endif
static inline unsigned long cpacr_save_enable_kernel_sve(void)
{
@@ -89,6 +87,7 @@ struct cpu_fp_state {
void *sve_state;
void *sme_state;
u64 *svcr;
+ u64 *fpmr;
unsigned int sve_vl;
unsigned int sme_vl;
enum fp_type *fp_type;
@@ -154,6 +153,7 @@ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
extern u64 read_smcr_features(void);
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 84055329cd..bd81cf1774 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg,
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
-#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index cd71e09ea1..4edd3b61df 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -142,6 +142,21 @@
#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
+#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR)
+#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT)
+#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX)
+#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT)
+#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA)
+#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4)
+#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2)
+#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3)
+#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2)
+#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2)
+#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16)
+#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32)
+#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
+#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
+#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
index bc6c7ac934..bc30aadedf 100644
--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ b/arch/arm64/include/asm/hyperv-tlfs.h
@@ -22,14 +22,6 @@
*/
/*
- * These Hyper-V registers provide information equivalent to the CPUID
- * instruction on x86/x64.
- */
-#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */
-#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */
-#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */
-
-/*
* Group C Features. See the asm-generic version of hyperv-tlfs.h
* for a description of Feature Groups.
*/
@@ -41,28 +33,29 @@
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13)
/*
- * Synthetic register definitions equivalent to MSRs on x86/x64
+ * To support arch-generic code calling hv_set/get_register:
+ * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
+ * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
*/
-#define HV_REGISTER_CRASH_P0 0x00000210
-#define HV_REGISTER_CRASH_P1 0x00000211
-#define HV_REGISTER_CRASH_P2 0x00000212
-#define HV_REGISTER_CRASH_P3 0x00000213
-#define HV_REGISTER_CRASH_P4 0x00000214
-#define HV_REGISTER_CRASH_CTL 0x00000215
+#define HV_MSR_CRASH_P0 (HV_REGISTER_GUEST_CRASH_P0)
+#define HV_MSR_CRASH_P1 (HV_REGISTER_GUEST_CRASH_P1)
+#define HV_MSR_CRASH_P2 (HV_REGISTER_GUEST_CRASH_P2)
+#define HV_MSR_CRASH_P3 (HV_REGISTER_GUEST_CRASH_P3)
+#define HV_MSR_CRASH_P4 (HV_REGISTER_GUEST_CRASH_P4)
+#define HV_MSR_CRASH_CTL (HV_REGISTER_GUEST_CRASH_CTL)
-#define HV_REGISTER_GUEST_OSID 0x00090002
-#define HV_REGISTER_VP_INDEX 0x00090003
-#define HV_REGISTER_TIME_REF_COUNT 0x00090004
-#define HV_REGISTER_REFERENCE_TSC 0x00090017
+#define HV_MSR_VP_INDEX (HV_REGISTER_VP_INDEX)
+#define HV_MSR_TIME_REF_COUNT (HV_REGISTER_TIME_REF_COUNT)
+#define HV_MSR_REFERENCE_TSC (HV_REGISTER_REFERENCE_TSC)
-#define HV_REGISTER_SINT0 0x000A0000
-#define HV_REGISTER_SCONTROL 0x000A0010
-#define HV_REGISTER_SIEFP 0x000A0012
-#define HV_REGISTER_SIMP 0x000A0013
-#define HV_REGISTER_EOM 0x000A0014
+#define HV_MSR_SINT0 (HV_REGISTER_SINT0)
+#define HV_MSR_SCONTROL (HV_REGISTER_SCONTROL)
+#define HV_MSR_SIEFP (HV_REGISTER_SIEFP)
+#define HV_MSR_SIMP (HV_REGISTER_SIMP)
+#define HV_MSR_EOM (HV_REGISTER_EOM)
-#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
-#define HV_REGISTER_STIMER0_COUNT 0x000B0001
+#define HV_MSR_STIMER0_CONFIG (HV_REGISTER_STIMER0_CONFIG)
+#define HV_MSR_STIMER0_COUNT (HV_REGISTER_STIMER0_COUNT)
union hv_msi_entry {
u64 as_uint64[2];
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 3b694511b9..8d825522c5 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -24,25 +24,29 @@
#define __raw_writeb __raw_writeb
static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
- asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u8 __iomem *ptr = addr;
+ asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writew __raw_writew
static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
- asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u16 __iomem *ptr = addr;
+ asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writel __raw_writel
static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
- asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u32 __iomem *ptr = addr;
+ asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writeq __raw_writeq
static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
- asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u64 __iomem *ptr = addr;
+ asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_readb __raw_readb
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 7eefc525a9..e1b57c13f8 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -17,11 +17,9 @@
asmlinkage void kasan_early_init(void);
void kasan_init(void);
-void kasan_copy_shadow(pgd_t *pgdir);
#else
static inline void kasan_init(void) { }
-static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 83ddb14b95..bf05a77873 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -13,28 +13,27 @@
#include <asm/sparsemem.h>
/*
- * The linear mapping and the start of memory are both 2M aligned (per
- * the arm64 booting.txt requirements). Hence we can use section mapping
- * with 4K (section size = 2M) but not with 16K (section size = 32M) or
- * 64K (section size = 512M).
+ * The physical and virtual addresses of the start of the kernel image are
+ * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
+ * use section mapping with 4K (section size = 2M) but not with 16K (section
+ * size = 32M) or 64K (section size = 512M).
*/
-
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so pages required to map highest possible PA are reserved in all
- * cases.
- */
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
+#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_SKIP_LEVEL 1
#else
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
+#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
+#define SWAPPER_SKIP_LEVEL 0
#endif
+#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
+#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3)
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
+#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
+
+#define IDMAP_VA_BITS 48
+#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
+#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
/*
* A relocatable kernel may execute from an address that differs from the one at
@@ -50,57 +49,39 @@
#define EARLY_ENTRIES(vstart, vend, shift, add) \
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
-#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
-
-#if SWAPPER_PGTABLE_LEVELS > 3
-#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
-#else
-#define EARLY_PUDS(vstart, vend, add) (0)
-#endif
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
-#if SWAPPER_PGTABLE_LEVELS > 2
-#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add))
-#else
-#define EARLY_PMDS(vstart, vend, add) (0)
-#endif
+#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
+#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ + EARLY_SEGMENT_EXTRA_PAGES))
-#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
- + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
- + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
- + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
-/* the initial ID map may need two extra pages if it needs to be extended */
-#if VA_BITS < 48
-#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
-#else
-#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
-#endif
-#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
+#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
+#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
-/* Initial memory map size */
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
-#define SWAPPER_BLOCK_SIZE PMD_SIZE
-#define SWAPPER_TABLE_SHIFT PUD_SHIFT
-#else
-#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
-#define SWAPPER_BLOCK_SIZE PAGE_SIZE
-#define SWAPPER_TABLE_SHIFT PMD_SHIFT
-#endif
+/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
+#define KERNEL_SEGMENT_COUNT 5
+#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
+#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
/*
- * Initial memory map attributes.
+ * The initial ID map consists of the kernel image, mapped as two separate
+ * segments, and may appear misaligned wrt the swapper block size. This means
+ * we need 3 additional pages. The DT could straddle a swapper block boundary,
+ * so it may need 2.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
-
-#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
+#define EARLY_IDMAP_EXTRA_PAGES 3
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
#else
-#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
+#define EARLY_SEGMENT_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 9ac9572a3b..4d9cc7a76d 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -80,7 +80,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}
}
-#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
extern bool crash_is_nosave(unsigned long pfn);
extern void crash_prepare_suspend(void);
extern void crash_post_resume(void);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3c6f8ba1e4..e01bb5ca13 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -102,10 +102,8 @@
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
-#define HCRX_GUEST_FLAGS \
- (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
- (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
-#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
+#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
+#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index b804fe8321..975af30af3 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -209,7 +209,8 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
{
- return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
+ return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+ (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H));
}
static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
@@ -425,15 +426,9 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case ESR_ELx_FSC_EXTABT:
- case ESR_ELx_FSC_SEA_TTW0:
- case ESR_ELx_FSC_SEA_TTW1:
- case ESR_ELx_FSC_SEA_TTW2:
- case ESR_ELx_FSC_SEA_TTW3:
+ case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
case ESR_ELx_FSC_SECC:
- case ESR_ELx_FSC_SECC_TTW0:
- case ESR_ELx_FSC_SECC_TTW1:
- case ESR_ELx_FSC_SECC_TTW2:
- case ESR_ELx_FSC_SECC_TTW3:
+ case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
return true;
default:
return false;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b8125..9e8a496fb2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -238,9 +238,32 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
return index;
}
+struct kvm_sysreg_masks;
+
+enum fgt_group_id {
+ __NO_FGT_GROUP__,
+ HFGxTR_GROUP,
+ HDFGRTR_GROUP,
+ HDFGWTR_GROUP = HDFGRTR_GROUP,
+ HFGITR_GROUP,
+ HAFGRTR_GROUP,
+
+ /* Must be last */
+ __NR_FGT_GROUP_IDS__
+};
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
+ /*
+ * Fine-Grained UNDEF, mimicking the FGT layout defined by the
+ * architecture. We track them globally, as we present the
+ * same feature-set to all vcpus.
+ *
+ * Index 0 is currently spare.
+ */
+ u64 fgu[__NR_FGT_GROUP_IDS__];
+
/* Interrupt controller */
struct vgic_dist vgic;
@@ -274,6 +297,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
/* Initial ID reg values loaded */
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
+ /* Fine-Grained UNDEF initialised */
+#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
unsigned long flags;
/* VM-wide vCPU feature set */
@@ -294,6 +319,9 @@ struct kvm_arch {
/* PMCR_EL0.N value for the guest */
u8 pmcr_n;
+ /* Iterator for idreg debugfs */
+ u8 idreg_debugfs_iter;
+
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
struct maple_tree smccc_filter;
@@ -312,6 +340,9 @@ struct kvm_arch {
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
u64 id_regs[KVM_ARM_ID_REG_NUM];
+ /* Masks for VNCR-baked sysregs */
+ struct kvm_sysreg_masks *sysreg_masks;
+
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
* the associated pKVM instance in the hypervisor.
@@ -474,6 +505,13 @@ enum vcpu_sysreg {
NR_SYS_REGS /* Nothing after this line! */
};
+struct kvm_sysreg_masks {
+ struct {
+ u64 res0;
+ u64 res1;
+ } mask[NR_SYS_REGS - __VNCR_START__];
+};
+
struct kvm_cpu_context {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -543,12 +581,14 @@ struct kvm_vcpu_arch {
enum fp_type fp_type;
unsigned int sve_max_vl;
u64 svcr;
+ u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
/* Values of trap registers for the guest. */
u64 hcr_el2;
+ u64 hcrx_el2;
u64 mdcr_el2;
u64 cptr_el2;
@@ -868,7 +908,15 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
-#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
+u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+#define __vcpu_sys_reg(v,r) \
+ (*({ \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 *__r = __ctxt_sys_reg(ctxt, (r)); \
+ if (vcpu_has_nv((v)) && (r) >= __VNCR_START__) \
+ *__r = kvm_vcpu_sanitise_vncr_reg((v), (r)); \
+ __r; \
+ }))
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1055,14 +1103,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
+void kvm_sys_regs_create_debugfs(struct kvm *kvm);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
+struct sys_reg_desc;
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+ unsigned int idx);
int __init populate_nv_trap_config(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
+void kvm_init_sysreg(struct kvm_vcpu *);
+
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -1233,4 +1287,48 @@ static inline void kvm_hyp_reserve(void) { }
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
+#define __expand_field_sign_unsigned(id, fld, val) \
+ ((u64)SYS_FIELD_VALUE(id, fld, val))
+
+#define __expand_field_sign_signed(id, fld, val) \
+ ({ \
+ u64 __val = SYS_FIELD_VALUE(id, fld, val); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define expand_field_sign(id, fld, val) \
+ (id##_##fld##_SIGNED ? \
+ __expand_field_sign_signed(id, fld, val) : \
+ __expand_field_sign_unsigned(id, fld, val))
+
+#define get_idreg_field_unsigned(kvm, id, fld) \
+ ({ \
+ u64 __val = IDREG((kvm), SYS_##id); \
+ FIELD_GET(id##_##fld##_MASK, __val); \
+ })
+
+#define get_idreg_field_signed(kvm, id, fld) \
+ ({ \
+ u64 __val = get_idreg_field_unsigned(kvm, id, fld); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define get_idreg_field_enum(kvm, id, fld) \
+ get_idreg_field_unsigned(kvm, id, fld)
+
+#define get_idreg_field(kvm, id, fld) \
+ (id##_##fld##_SIGNED ? \
+ get_idreg_field_signed(kvm, id, fld) : \
+ get_idreg_field_unsigned(kvm, id, fld))
+
+#define kvm_has_feat(kvm, id, fld, limit) \
+ (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
+
+#define kvm_has_feat_enum(kvm, id, fld, val) \
+ (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
+
+#define kvm_has_feat_range(kvm, id, fld, min, max) \
+ (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
+ get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 145ce73fc1..3e2a1ac0c9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
/*
* Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
* static inline can allow the compiler to out-of-line this. KVM always wants
- * the macro version as its always inlined.
+ * the macro version as it's always inlined.
*/
#define __kvm_swab32(x) ___constant_swab32(x)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e3e793d0ec..d5e48d8704 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -54,27 +54,6 @@
#include <asm/alternative.h>
/*
- * Convert a kernel VA into a HYP VA.
- * reg: VA to be converted.
- *
- * The actual code generation takes place in kvm_update_va_mask, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_va_mask uses the
- * specific registers encoded in the instructions).
- */
-.macro kern_hyp_va reg
-#ifndef __KVM_VHE_HYPERVISOR__
-alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
- and \reg, \reg, #1 /* mask with va_mask */
- ror \reg, \reg, #1 /* rotate to the first tag bit */
- add \reg, \reg, #0 /* insert the low 12 bits of the tag */
- add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
- ror \reg, \reg, #63 /* rotate back */
-alternative_cb_end
-#endif
-.endm
-
-/*
* Convert a hypervisor VA to a PA
* reg: hypervisor address to be converted in place
* tmp: temporary register
@@ -127,14 +106,29 @@ void kvm_apply_hyp_relocations(void);
#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
+/*
+ * Convert a kernel VA into a HYP VA.
+ *
+ * Can be called from hyp or non-hyp context.
+ *
+ * The actual code generation takes place in kvm_update_va_mask(), and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask() uses the
+ * specific registers encoded in the instructions).
+ */
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
+/*
+ * This #ifndef is an optimisation for when this is called from VHE hyp
+ * context. When called from a VHE non-hyp context, kvm_update_va_mask() will
+ * replace the instructions with `nop`s.
+ */
#ifndef __KVM_VHE_HYPERVISOR__
- asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
- "ror %0, %0, #1\n"
- "add %0, %0, #0\n"
- "add %0, %0, #0, lsl 12\n"
- "ror %0, %0, #63\n",
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */
+ "ror %0, %0, #1\n" /* rotate to the first tag bit */
+ "add %0, %0, #0\n" /* insert the low 12 bits of the tag */
+ "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */
+ "ror %0, %0, #63\n", /* rotate back */
ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 4882905357..c77d795556 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -60,7 +60,6 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
return ttbr0 & ~GENMASK_ULL(63, 48);
}
-extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
int kvm_init_nv_sysregs(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index cfdf40f734..19278dfe79 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -197,6 +197,7 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
+ * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
@@ -208,6 +209,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
+ KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index d82305ab42..54fb014eba 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -30,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
-#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
+#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET)
+#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page))
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
@@ -47,14 +47,18 @@
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
#define MODULES_VSIZE (SZ_2G)
-#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
-#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END (VMEMMAP_START - SZ_8M)
-#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
+#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE)
+#define VMEMMAP_END (-UL(SZ_1G))
+#define PCI_IO_START (VMEMMAP_END + SZ_8M)
+#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE)
+#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
+#ifdef CONFIG_ARM64_16K_PAGES
+#define VA_BITS_MIN (47)
+#else
#define VA_BITS_MIN (48)
+#endif
#else
#define VA_BITS_MIN (VA_BITS)
#endif
@@ -173,6 +177,7 @@
* Memory types for Stage-2 translation
*/
#define MT_S2_NORMAL 0xf
+#define MT_S2_NORMAL_NC 0x5
#define MT_S2_DEVICE_nGnRE 0x1
/*
@@ -180,6 +185,7 @@
* Stage-2 enforces Normal-WB and Device-nGnRE
*/
#define MT_S2_FWB_NORMAL 6
+#define MT_S2_FWB_NORMAL_NC 5
#define MT_S2_FWB_DEVICE_nGnRE 1
#ifdef CONFIG_ARM64_4K_PAGES
@@ -209,9 +215,20 @@
#include <asm/boot.h>
#include <asm/bug.h>
#include <asm/sections.h>
+#include <asm/sysreg.h>
+
+static inline u64 __pure read_tcr(void)
+{
+ u64 tcr;
+
+ // read_sysreg() uses asm volatile, so avoid it here
+ asm("mrs %0, tcr_el1" : "=r"(tcr));
+ return tcr;
+}
#if VA_BITS > 48
-extern u64 vabits_actual;
+// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
+#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
#else
#define vabits_actual ((u64)VA_BITS)
#endif
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2fcf51231d..65977c7783 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -71,10 +71,46 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
-extern bool kaslr_requires_kpti(void);
+
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+static inline bool kaslr_requires_kpti(void)
+{
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
+ return false;
+ }
+
+ /*
+ * Systems affected by Cavium erratum 24756 are incompatible
+ * with KPTI.
+ */
+ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
+ extern const struct midr_range cavium_erratum_27456_cpus[];
+
+ if (is_midr_in_range_list(read_cpuid_id(),
+ cavium_erratum_27456_cpus))
+ return false;
+ }
+
+ return true;
+}
#define INIT_MM_CONTEXT(name) \
- .pgd = init_pg_dir,
+ .pgd = swapper_pg_dir,
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 9ce4200508..c768d16b81 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -61,11 +61,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
}
/*
- * TCR.T0SZ value to use when the ID map is active. Usually equals
- * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
- * physical memory, in which case it will be smaller.
+ * TCR.T0SZ value to use when the ID map is active.
*/
-extern int idmap_t0sz;
+#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
/*
* Ensure TCR.T0SZ is set to the provided value.
@@ -110,18 +108,13 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
-static inline void __cpu_install_idmap(pgd_t *idmap)
+static inline void cpu_install_idmap(void)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(lm_alias(idmap), &init_mm);
-}
-
-static inline void cpu_install_idmap(void)
-{
- __cpu_install_idmap(idmap_pg_dir);
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@@ -148,51 +141,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
isb();
}
-/*
- * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
- * avoiding the possibility of conflicting TLB entries being allocated.
- */
-static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
-{
- typedef void (ttbr_replace_func)(phys_addr_t);
- extern ttbr_replace_func idmap_cpu_replace_ttbr1;
- ttbr_replace_func *replace_phys;
- unsigned long daif;
-
- /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
- phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
-
- if (cnp)
- ttbr1 |= TTBR_CNP_BIT;
-
- replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
-
- __cpu_install_idmap(idmap);
-
- /*
- * We really don't want to take *any* exceptions while TTBR1 is
- * in the process of being replaced so mask everything.
- */
- daif = local_daif_save();
- replace_phys(ttbr1);
- local_daif_restore(daif);
-
- cpu_uninstall_idmap();
-}
+void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
static inline void cpu_enable_swapper_cnp(void)
{
- __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
+ __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
}
-static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
/*
* Only for early TTBR1 replacement before cpucaps are finalized and
* before we've decided whether to use CNP.
*/
WARN_ON(system_capabilities_finalized());
- __cpu_replace_ttbr1(pgdp, idmap, false);
+ __cpu_replace_ttbr1(pgdp, false);
}
/*
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index 20070a8473..a975e1a689 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -31,12 +31,12 @@ void hv_set_vpreg(u32 reg, u64 value);
u64 hv_get_vpreg(u32 reg);
void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result);
-static inline void hv_set_register(unsigned int reg, u64 value)
+static inline void hv_set_msr(unsigned int reg, u64 value)
{
hv_set_vpreg(reg, value);
}
-static inline u64 hv_get_register(unsigned int reg)
+static inline u64 hv_get_msr(unsigned int reg)
{
return hv_get_vpreg(reg);
}
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
index 2403f7b4cd..792e9fe881 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,7 +11,7 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 68908b82b1..587bdb91ab 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -8,6 +8,8 @@ int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_set(void *dst, u32 insn, size_t len);
+void *aarch64_insn_copy(void *dst, void *src, size_t len);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 237224484d..8ff5f2a257 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
#include <asm/tlbflush.h>
#define __HAVE_ARCH_PGD_FREE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
@@ -43,7 +44,8 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
- set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
@@ -53,6 +55,13 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
__p4d_populate(p4dp, __pa(pudp), p4dval);
}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!pgtable_l4_enabled())
+ return;
+ __pud_free(mm, pud);
+}
#else
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
@@ -60,6 +69,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
+{
+ pgdval_t pgdval = PGD_TYPE_TABLE;
+
+ pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
+ __pgd_populate(pgdp, __pa(p4dp), pgdval);
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!pgtable_l5_enabled())
+ return;
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index e4944d517c..ef207a0d4f 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -26,10 +26,10 @@
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
/*
- * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * Size mapped by an entry at level n ( -1 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at level n, we have further
+ * the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
@@ -62,9 +62,16 @@
#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
#endif
+#if CONFIG_PGTABLE_LEVELS > 4
+#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))
+#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
+#endif
+
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
- * (depending on the configuration, this level can be 0, 1 or 2).
+ * (depending on the configuration, this level can be -1, 0, 1 or 2).
*/
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
@@ -87,6 +94,15 @@
/*
* Hardware page table definitions.
*
+ * Level -1 descriptor (PGD).
+ */
+#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
+#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
+#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
+
+/*
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
@@ -155,13 +171,17 @@
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
-#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
+#ifdef CONFIG_ARM64_64K_PAGES
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
-#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#define PTE_ADDR_HIGH_SHIFT 36
+#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#else
-#define PTE_ADDR_MASK PTE_ADDR_LOW
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
+#define PTE_ADDR_HIGH_SHIFT 42
+#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
+#endif
#endif
/*
@@ -284,6 +304,7 @@
#define TCR_E0PD1 (UL(1) << 56)
#define TCR_TCMA0 (UL(1) << 57)
#define TCR_TCMA1 (UL(1) << 58)
+#define TCR_DS (UL(1) << 59)
/*
* TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 483dbfa39c..dd9ee67d1d 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -30,8 +30,8 @@
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
-#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -57,10 +57,6 @@
#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-#ifdef __ASSEMBLY__
-#define PTE_MAYBE_NG 0
-#endif
-
#ifndef __ASSEMBLY__
#include <asm/cpufeature.h>
@@ -71,7 +67,19 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+#ifndef CONFIG_ARM64_LPA2
#define lpa2_is_enabled() false
+#define PTE_MAYBE_SHARED PTE_SHARED
+#define PMD_MAYBE_SHARED PMD_SECT_S
+#else
+static inline bool __pure lpa2_is_enabled(void)
+{
+ return read_tcr() & TCR_DS;
+}
+
+#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
+#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#endif
/*
* If we have userspace only BTI we don't want to mark kernel pages
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index b8f158ae25..6d6d4065b0 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t;
#define __pud(x) ((pud_t) { (x) } )
#endif
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) } )
+#endif
+
typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 79ce70fbb7..afdd56d26a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -18,11 +18,15 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
- * and fixed mappings
+ * VMALLOC_END: extends to the available space below vmemmap
*/
#define VMALLOC_START (MODULES_END)
-#define VMALLOC_END (VMEMMAP_START - SZ_256M)
+#if VA_BITS == VA_BITS_MIN
+#define VMALLOC_END (VMEMMAP_START - SZ_8M)
+#else
+#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
+#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
+#endif
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
@@ -76,15 +80,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
+ pte_val(pte) &= ~PTE_MAYBE_SHARED;
return (pte_val(pte) & PTE_ADDR_LOW) |
((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
}
static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
{
- return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
+ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
+#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
#define __phys_to_pte_val(phys) (phys)
#endif
@@ -93,7 +98,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pte_none(pte) (!pte_val(pte))
-#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
+#define __pte_clear(mm, addr, ptep) \
+ __set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
/*
@@ -133,11 +139,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
/*
+ * Returns true if the pte is valid and has the contiguous bit set.
+ */
+#define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte))
+/*
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
* so that we don't erroneously return false for pages that have been
* remapped as PROT_NONE but are yet to be flushed from the TLB.
* Note that we can't make any assumptions based on the state of the access
- * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
* TLB.
*/
#define pte_accessible(mm, pte) \
@@ -261,7 +271,7 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
}
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte(pte_t *ptep, pte_t pte)
{
WRITE_ONCE(*ptep, pte);
@@ -275,6 +285,11 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
}
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+
extern void __sync_icache_dcache(pte_t pteval);
bool pgattr_change_is_safe(u64 old, u64 new);
@@ -302,7 +317,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
if (!IS_ENABLED(CONFIG_DEBUG_VM))
return;
- old_pte = READ_ONCE(*ptep);
+ old_pte = __ptep_get(ptep);
if (!pte_valid(old_pte) || !pte_valid(pte))
return;
@@ -311,7 +326,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
/*
* Check for potential race with hardware updates of the pte
- * (ptep_set_access_flags safely changes valid ptes without going
+ * (__ptep_set_access_flags safely changes valid ptes without going
* through an invalid entry).
*/
VM_WARN_ONCE(!pte_young(pte),
@@ -341,23 +356,38 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
mte_sync_tags(pte, nr_pages);
}
-static inline void set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
+#define pte_advance_pfn pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+ return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
+}
+
+static inline void __set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
__sync_cache_and_tags(pte, nr);
for (;;) {
__check_safe_pte_update(mm, ptep, pte);
- set_pte(ptep, pte);
+ __set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
- pte_val(pte) += PAGE_SIZE;
+ pte = pte_advance_pfn(pte, 1);
}
}
-#define set_ptes set_ptes
/*
* Huge pte definitions.
@@ -433,16 +463,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
-{
- unsigned long pfn = pte_pfn(pte);
-
- return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-}
-
#ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include/linux/pgtable.h
@@ -534,7 +554,7 @@ static inline void __set_pte_at(struct mm_struct *mm,
{
__sync_cache_and_tags(pte, nr);
__check_safe_pte_update(mm, ptep, pte);
- set_pte(ptep, pte);
+ __set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -616,12 +636,12 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[PTRS_PER_PGD];
+extern pgd_t init_pg_dir[];
extern pgd_t init_pg_end[];
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
-extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
+extern pgd_t swapper_pg_dir[];
+extern pgd_t idmap_pg_dir[];
+extern pgd_t tramp_pg_dir[];
+extern pgd_t reserved_pg_dir[];
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@ -694,14 +714,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_user(pud) pte_user(pud_pte(pud))
#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
+static inline bool pgtable_l4_enabled(void);
+
static inline void set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef __PAGETABLE_PUD_FOLDED
- if (in_swapper_pgdir(pudp)) {
+ if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
return;
}
-#endif /* __PAGETABLE_PUD_FOLDED */
WRITE_ONCE(*pudp, pud);
@@ -754,12 +774,27 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#if CONFIG_PGTABLE_LEVELS > 3
+static __always_inline bool pgtable_l4_enabled(void)
+{
+ if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
+ return true;
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_pud_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l4_enabled();
+}
+#define mm_pud_folded mm_pud_folded
+
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
-#define p4d_none(p4d) (!p4d_val(p4d))
-#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
-#define p4d_present(p4d) (p4d_val(p4d))
+#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
+#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
@@ -775,7 +810,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline void p4d_clear(p4d_t *p4dp)
{
- set_p4d(p4dp, __p4d(0));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(0));
}
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
@@ -783,27 +819,75 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
}
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
+{
+ return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
+}
+
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
return (pud_t *)__va(p4d_page_paddr(p4d));
}
-/* Find an entry in the first-level page table. */
-#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
+static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l4_enabled());
-#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
-#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
-#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
+ return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
+}
-#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
+static inline
+pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
+{
+ return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
+}
+#define pud_offset pud_offset
+
+static inline pud_t *pud_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return NULL;
+ return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
+}
+
+static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return pud_set_fixmap(pud_offset_phys(p4dp, addr));
+}
+
+static inline void pud_clear_fixmap(void)
+{
+ if (pgtable_l4_enabled())
+ clear_fixmap(FIX_PUD);
+}
/* use ONLY for statically allocated translation tables */
-#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
+}
+
+#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
#else
+static inline bool pgtable_l4_enabled(void) { return false; }
+
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
-#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
#define pud_set_fixmap(addr) NULL
@@ -814,6 +898,122 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static __always_inline bool pgtable_l5_enabled(void)
+{
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_p4d_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l5_enabled();
+}
+#define mm_p4d_folded mm_p4d_folded
+
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
+
+#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (in_swapper_pgdir(pgdp)) {
+ set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
+ return;
+ }
+
+ WRITE_ONCE(*pgdp, pgd);
+ dsb(ishst);
+ isb();
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(0));
+}
+
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
+{
+ return __pgd_to_phys(pgd);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
+{
+ return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
+}
+
+static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l5_enabled());
+
+ return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
+}
+
+static inline
+p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
+{
+ return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
+}
+
+static inline p4d_t *p4d_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return NULL;
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
+}
+
+static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
+}
+
+static inline void p4d_clear_fixmap(void)
+{
+ if (pgtable_l5_enabled())
+ clear_fixmap(FIX_P4D);
+}
+
+/* use ONLY for statically allocated translation tables */
+static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
+}
+
+#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+
+#else
+
+static inline bool pgtable_l5_enabled(void) { return false; }
+
+/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
+#define p4d_set_fixmap(addr) NULL
+#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
+#define p4d_clear_fixmap()
+
+#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -848,8 +1048,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma,
+extern int __ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty);
@@ -859,7 +1058,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty)
{
- return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+ return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
+ pmd_pte(entry), dirty);
}
static inline int pud_devmap(pud_t pud)
@@ -893,12 +1093,13 @@ static inline bool pud_user_accessible_page(pud_t pud)
/*
* Atomic pte/pmd modifications.
*/
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
{
pte_t old_pte, pte;
- pte = READ_ONCE(*ptep);
+ pte = __ptep_get(ptep);
do {
old_pte = pte;
pte = pte_mkold(pte);
@@ -909,18 +1110,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep)
return pte_young(pte);
}
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
-{
- return __ptep_test_and_clear_young(ptep);
-}
-
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- int young = ptep_test_and_clear_young(vma, address, ptep);
+ int young = __ptep_test_and_clear_young(vma, address, ptep);
if (young) {
/*
@@ -943,12 +1136,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
{
- return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+ return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
@@ -958,6 +1150,37 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
+static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ for (;;) {
+ __ptep_get_and_clear(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte, tmp_pte;
+
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+ return pte;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -971,16 +1194,12 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
- */
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
+static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep,
+ pte_t pte)
{
- pte_t old_pte, pte;
+ pte_t old_pte;
- pte = READ_ONCE(*ptep);
do {
old_pte = pte;
pte = pte_wrprotect(pte);
@@ -989,12 +1208,31 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
} while (pte_val(pte) != pte_val(old_pte));
}
+/*
+ * __ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ */
+static inline void __ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep));
+}
+
+static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
+ pte_t *ptep, unsigned int nr)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++)
+ __ptep_set_wrprotect(mm, address, ptep);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+ __ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#define pmdp_establish pmdp_establish
@@ -1072,7 +1310,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#endif /* CONFIG_ARM64_MTE */
/*
- * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ * On AArch64, the cache coherency is handled via the __set_ptes() function.
*/
static inline void update_mmu_cache_range(struct vm_fault *vmf,
struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
@@ -1124,6 +1362,282 @@ extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+
+#ifdef CONFIG_ARM64_CONTPTE
+
+/*
+ * The contpte APIs are used to transparently manage the contiguous bit in ptes
+ * where it is possible and makes sense to do so. The PTE_CONT bit is considered
+ * a private implementation detail of the public ptep API (see below).
+ */
+extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
+extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
+extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr);
+extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full);
+extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full);
+extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr);
+extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty);
+
+static __always_inline void contpte_try_fold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ /*
+ * Only bother trying if both the virtual and physical addresses are
+ * aligned and correspond to the last entry in a contig range. The core
+ * code mostly modifies ranges from low to high, so this is the likely
+ * the last modification in the contig range, so a good time to fold.
+ * We can't fold special mappings, because there is no associated folio.
+ */
+
+ const unsigned long contmask = CONT_PTES - 1;
+ bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask;
+
+ if (unlikely(valign)) {
+ bool palign = (pte_pfn(pte) & contmask) == contmask;
+
+ if (unlikely(palign &&
+ pte_valid(pte) && !pte_cont(pte) && !pte_special(pte)))
+ __contpte_try_fold(mm, addr, ptep, pte);
+ }
+}
+
+static __always_inline void contpte_try_unfold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ if (unlikely(pte_valid_cont(pte)))
+ __contpte_try_unfold(mm, addr, ptep, pte);
+}
+
+#define pte_batch_hint pte_batch_hint
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+ if (!pte_valid_cont(pte))
+ return 1;
+
+ return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1));
+}
+
+/*
+ * The below functions constitute the public API that arm64 presents to the
+ * core-mm to manipulate PTE entries within their page tables (or at least this
+ * is the subset of the API that arm64 needs to implement). These public
+ * versions will automatically and transparently apply the contiguous bit where
+ * it makes sense to do so. Therefore any users that are contig-aware (e.g.
+ * hugetlb, kernel mapper) should NOT use these APIs, but instead use the
+ * private versions, which are prefixed with double underscore. All of these
+ * APIs except for ptep_get_lockless() are expected to be called with the PTL
+ * held. Although the contiguous bit is considered private to the
+ * implementation, it is deliberately allowed to leak through the getters (e.g.
+ * ptep_get()), back to core code. This is required so that pte_leaf_size() can
+ * provide an accurate size for perf_get_pgtable_size(). But this leakage means
+ * its possible a pte will be passed to a setter with the contiguous bit set, so
+ * we explicitly clear the contiguous bit in those cases to prevent accidentally
+ * setting it in the pgtable.
+ */
+
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get(ptep, pte);
+}
+
+#define ptep_get_lockless ptep_get_lockless
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get_lockless(ptep);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ /*
+ * We don't have the mm or vaddr so cannot unfold contig entries (since
+ * it requires tlb maintenance). set_pte() is not used in core code, so
+ * this should never even be called. Regardless do our best to service
+ * any call and emit a warning if there is any attempt to set a pte on
+ * top of an existing contig range.
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ WARN_ON_ONCE(pte_valid_cont(orig_pte));
+ __set_pte(ptep, pte_mknoncont(pte));
+}
+
+#define set_ptes set_ptes
+static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ pte = pte_mknoncont(pte);
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __set_ptes(mm, addr, ptep, pte, 1);
+ contpte_try_fold(mm, addr, ptep, pte);
+ } else {
+ contpte_set_ptes(mm, addr, ptep, pte, nr);
+ }
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __pte_clear(mm, addr, ptep);
+}
+
+#define clear_full_ptes clear_full_ptes
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ contpte_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+}
+
+#define get_and_clear_full_ptes get_and_clear_full_ptes
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte;
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ return __ptep_get_and_clear(mm, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_test_and_clear_young(vma, addr, ptep);
+
+ return contpte_ptep_test_and_clear_young(vma, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_clear_flush_young(vma, addr, ptep);
+
+ return contpte_ptep_clear_flush_young(vma, addr, ptep);
+}
+
+#define wrprotect_ptes wrprotect_ptes
+static __always_inline void wrprotect_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ if (likely(nr == 1)) {
+ /*
+ * Optimization: wrprotect_ptes() can only be called for present
+ * ptes so we only need to check contig bit as condition for
+ * unfold, and we can remove the contig bit from the pte we read
+ * to avoid re-reading. This speeds up fork() which is sensitive
+ * for order-0 folios. Equivalent to contpte_try_unfold().
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (unlikely(pte_cont(orig_pte))) {
+ __contpte_try_unfold(mm, addr, ptep, orig_pte);
+ orig_pte = pte_mknoncont(orig_pte);
+ }
+ ___ptep_set_wrprotect(mm, addr, ptep, orig_pte);
+ } else {
+ contpte_wrprotect_ptes(mm, addr, ptep, nr);
+ }
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ wrprotect_ptes(mm, addr, ptep, 1);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ entry = pte_mknoncont(entry);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+
+ return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+}
+
+#else /* CONFIG_ARM64_CONTPTE */
+
+#define ptep_get __ptep_get
+#define set_pte __set_pte
+#define set_ptes __set_ptes
+#define pte_clear __pte_clear
+#define clear_full_ptes __clear_full_ptes
+#define get_and_clear_full_ptes __get_and_clear_full_ptes
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear __ptep_get_and_clear
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young __ptep_test_and_clear_young
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young __ptep_clear_flush_young
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define ptep_set_wrprotect __ptep_set_wrprotect
+#define wrprotect_ptes __wrprotect_ptes
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags __ptep_set_access_flags
+
+#endif /* CONFIG_ARM64_CONTPTE */
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5b0a04810b..f77371232d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -155,6 +155,8 @@ struct thread_struct {
struct {
unsigned long tp_value; /* TLS register */
unsigned long tp2_value;
+ u64 fpmr;
+ unsigned long pad;
struct user_fpsimd_state fpsimd_state;
} uw;
@@ -253,6 +255,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
sizeof_field(struct thread_struct, uw.tp_value) +
sizeof_field(struct thread_struct, uw.tp2_value) +
+ sizeof_field(struct thread_struct, uw.fpmr) +
+ sizeof_field(struct thread_struct, uw.pad) +
sizeof_field(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw);
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 581caac525..5b1701c76d 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -29,13 +29,6 @@ void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
#endif
-void ptdump_check_wx(void);
#endif /* CONFIG_PTDUMP_CORE */
-#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx() ptdump_check_wx()
-#else
-#define debug_checkwx() do { } while (0)
-#endif
-
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index 3fdae5fe31..2e010ea76b 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -33,37 +33,11 @@
#include <asm/cpufeature.h>
#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
-static inline bool should_patch_pac_into_scs(void)
-{
- u64 reg;
-
- /*
- * We only enable the shadow call stack dynamically if we are running
- * on a system that does not implement PAC or BTI. PAC and SCS provide
- * roughly the same level of protection, and BTI relies on the PACIASP
- * instructions serving as landing pads, preventing us from patching
- * those instructions into something else.
- */
- reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
- if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) |
- SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg))
- return false;
-
- reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
- if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg))
- return false;
-
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
- reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
- if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT))
- return false;
- }
- return true;
-}
-
static inline void dynamic_scs_init(void)
{
- if (should_patch_pac_into_scs()) {
+ extern bool __pi_dynamic_scs_is_enabled;
+
+ if (__pi_dynamic_scs_is_enabled) {
pr_info("Enabling dynamic shadow call stack\n");
static_branch_enable(&dynamic_scs_enabled);
}
@@ -72,8 +46,8 @@ static inline void dynamic_scs_init(void)
static inline void dynamic_scs_init(void) {}
#endif
-int scs_patch(const u8 eh_frame[], int size);
-asmlinkage void scs_patch_vmlinux(void);
+int __pi_scs_patch(const u8 eh_frame[], int size);
+asmlinkage void __pi_scs_patch_vmlinux(void);
#endif /* __ASSEMBLY __ */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index 2e4d7da74f..ba269a7a32 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -7,9 +7,6 @@
#include <uapi/asm/setup.h>
-void *get_early_fdt_ptr(void);
-void early_fdt_map(u64 dt_phys);
-
/*
* These two variables are used in the head.S file.
*/
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c3b19b376c..af3b206fa4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1036,18 +1036,18 @@
* Permission Indirection Extension (PIE) permission encodings.
* Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
*/
-#define PIE_NONE_O 0x0
-#define PIE_R_O 0x1
-#define PIE_X_O 0x2
-#define PIE_RX_O 0x3
-#define PIE_RW_O 0x5
-#define PIE_RWnX_O 0x6
-#define PIE_RWX_O 0x7
-#define PIE_R 0x8
-#define PIE_GCS 0x9
-#define PIE_RX 0xa
-#define PIE_RW 0xc
-#define PIE_RWX 0xe
+#define PIE_NONE_O UL(0x0)
+#define PIE_R_O UL(0x1)
+#define PIE_X_O UL(0x2)
+#define PIE_RX_O UL(0x3)
+#define PIE_RW_O UL(0x5)
+#define PIE_RWnX_O UL(0x6)
+#define PIE_RWX_O UL(0x7)
+#define PIE_R UL(0x8)
+#define PIE_GCS UL(0x9)
+#define PIE_RX UL(0xa)
+#define PIE_RW UL(0xc)
+#define PIE_RWX UL(0xe)
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
@@ -1181,6 +1181,8 @@
par; \
})
+#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val
+
#define SYS_FIELD_GET(reg, field, val) \
FIELD_GET(reg##_##field##_MASK, val)
@@ -1188,7 +1190,8 @@
FIELD_PREP(reg##_##field##_MASK, val)
#define SYS_FIELD_PREP_ENUM(reg, field, val) \
- FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+ FIELD_PREP(reg##_##field##_MASK, \
+ SYS_FIELD_VALUE(reg, field, val))
#endif
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 0150deb332..a947c6e784 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -103,6 +103,9 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
{
struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
+ if (!pgtable_l4_enabled())
+ return;
+
pagetable_pud_dtor(ptdesc);
tlb_remove_ptdesc(tlb, ptdesc);
}
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bfeb54f3a9..a75de2665d 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -424,7 +424,7 @@ do { \
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
+static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
@@ -458,10 +458,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
__flush_tlb_range_op(vae1is, start, pages, stride, asid,
tlb_level, true, lpa2_is_enabled());
- dsb(ish);
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ __flush_tlb_range_nosync(vma, start, end, stride,
+ last_level, tlb_level);
+ dsb(ish);
+}
+
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index f3b151ed0d..14251abee2 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -9,7 +9,8 @@
#ifndef __AARCH64EB__
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
struct word_at_a_time {
const unsigned long one_bits, high_bits;
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 5023599fa2..285610e626 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -107,5 +107,20 @@
#define HWCAP2_SVE_B16B16 (1UL << 45)
#define HWCAP2_LRCPC3 (1UL << 46)
#define HWCAP2_LSE128 (1UL << 47)
+#define HWCAP2_FPMR (1UL << 48)
+#define HWCAP2_LUT (1UL << 49)
+#define HWCAP2_FAMINMAX (1UL << 50)
+#define HWCAP2_F8CVT (1UL << 51)
+#define HWCAP2_F8FMA (1UL << 52)
+#define HWCAP2_F8DP4 (1UL << 53)
+#define HWCAP2_F8DP2 (1UL << 54)
+#define HWCAP2_F8E4M3 (1UL << 55)
+#define HWCAP2_F8E5M2 (1UL << 56)
+#define HWCAP2_SME_LUTV2 (1UL << 57)
+#define HWCAP2_SME_F8F16 (1UL << 58)
+#define HWCAP2_SME_F8F32 (1UL << 59)
+#define HWCAP2_SME_SF8FMA (1UL << 60)
+#define HWCAP2_SME_SF8DP4 (1UL << 61)
+#define HWCAP2_SME_SF8DP2 (1UL << 62)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 89d2fc872d..964df31da9 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,9 +37,7 @@
#include <asm/ptrace.h>
#include <asm/sve_context.h>
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -76,11 +74,11 @@ struct kvm_regs {
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
- KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+ KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
- KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+ KVM_ARM_DEVICE_ID_SHIFT)
/* Supported device IDs */
#define KVM_ARM_DEVICE_VGIC_V2 0
@@ -162,6 +160,11 @@ struct kvm_sync_regs {
__u64 device_irq_level;
};
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
+#define KVM_ARM_DEV_PMU (1 << 2)
+
/*
* PMU filter structure. Describe a range of events with a particular
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index f23c1dc3f0..8a45b7a411 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -152,6 +152,14 @@ struct tpidr2_context {
__u64 tpidr2;
};
+/* FPMR context */
+#define FPMR_MAGIC 0x46504d52
+
+struct fpmr_context {
+ struct _aarch64_ctx head;
+ __u64 fpmr;
+};
+
#define ZA_MAGIC 0x54366345
struct za_context {
diff --git a/arch/arm64/include/uapi/asm/sve_context.h b/arch/arm64/include/uapi/asm/sve_context.h
index 754ab751b5..72aefc0810 100644
--- a/arch/arm64/include/uapi/asm/sve_context.h
+++ b/arch/arm64/include/uapi/asm/sve_context.h
@@ -13,6 +13,17 @@
#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */
+/*
+ * Yes, __SVE_VQ_MAX is 512 QUADWORDS.
+ *
+ * To help ensure forward portability, this is much larger than the
+ * current maximum value defined by the SVE architecture. While arrays
+ * or static allocations can be sized based on this value, watch out!
+ * It will waste a surprisingly large amount of memory.
+ *
+ * Dynamic sizing based on the actual runtime vector length is likely to
+ * be preferable for most purposes.
+ */
#define __SVE_VQ_MIN 1
#define __SVE_VQ_MAX 512