summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 05:08:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 05:08:50 +0000
commitc237094850d33ea4ae93ebb2b04ba2e23adedeb6 (patch)
treee08643d51bc147dd580d1a5412657e15afbf8359 /arch
parentAdding debian version 6.8.11-1. (diff)
downloadlinux-c237094850d33ea4ae93ebb2b04ba2e23adedeb6.tar.xz
linux-c237094850d33ea4ae93ebb2b04ba2e23adedeb6.zip
Merging upstream version 6.8.12.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm64/include/asm/irqflags.h1
-rw-r--r--arch/arm64/kernel/fpsimd.c44
-rw-r--r--arch/loongarch/kernel/perf_event.c2
-rw-r--r--arch/m68k/kernel/entry.S4
-rw-r--r--arch/m68k/mac/misc.c36
-rw-r--r--arch/openrisc/kernel/traps.c48
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c1
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c2
-rw-r--r--arch/riscv/Kconfig.errata8
-rw-r--r--arch/riscv/errata/thead/errata.c24
-rw-r--r--arch/riscv/include/asm/errata_list.h20
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c20
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/mmu.h5
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/pgtable.h16
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/s390/mm/gmap.c165
-rw-r--r--arch/s390/net/bpf_jit_comp.c8
-rw-r--r--arch/sh/kernel/kprobes.c7
-rw-r--r--arch/sh/lib/checksum.S67
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/crypto/nh-avx2-x86_64.S1
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S1
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S1
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c28
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c6
-rw-r--r--arch/x86/lib/x86-opcode-map.txt10
-rw-r--r--arch/x86/mm/fault.c33
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c68
-rw-r--r--arch/x86/net/bpf_jit_comp.c57
-rw-r--r--arch/x86/purgatory/Makefile3
-rw-r--r--arch/x86/tools/relocs.c9
41 files changed, 407 insertions, 322 deletions
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index bddc82f789..a83d29fed1 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -110,6 +110,7 @@ CONFIG_DRM_PANEL_LVDS=y
CONFIG_DRM_PANEL_SIMPLE=y
CONFIG_DRM_PANEL_EDP=y
CONFIG_DRM_SIMPLE_BRIDGE=y
+CONFIG_DRM_DW_HDMI=y
CONFIG_DRM_LIMA=y
CONFIG_FB_SIMPLE=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 0a7186a938..d4d7451c2c 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -5,7 +5,6 @@
#ifndef __ASM_IRQFLAGS_H
#define __ASM_IRQFLAGS_H
-#include <asm/alternative.h>
#include <asm/barrier.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f27acca550..5e7b0eb468 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1518,6 +1518,27 @@ static void fpsimd_save_kernel_state(struct task_struct *task)
task->thread.kernel_fpsimd_cpu = smp_processor_id();
}
+/*
+ * Invalidate any task's FPSIMD state that is present on this cpu.
+ * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
+ * before calling this function.
+ */
+static void fpsimd_flush_cpu_state(void)
+{
+ WARN_ON(!system_supports_fpsimd());
+ __this_cpu_write(fpsimd_last_state.st, NULL);
+
+ /*
+ * Leaving streaming mode enabled will cause issues for any kernel
+ * NEON and leaving streaming mode or ZA enabled may increase power
+ * consumption.
+ */
+ if (system_supports_sme())
+ sme_smstop();
+
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1535,7 +1556,7 @@ void fpsimd_thread_switch(struct task_struct *next)
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
fpsimd_load_kernel_state(next);
- set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ fpsimd_flush_cpu_state();
} else {
/*
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
@@ -1825,27 +1846,6 @@ void fpsimd_flush_task_state(struct task_struct *t)
}
/*
- * Invalidate any task's FPSIMD state that is present on this cpu.
- * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
- * before calling this function.
- */
-static void fpsimd_flush_cpu_state(void)
-{
- WARN_ON(!system_supports_fpsimd());
- __this_cpu_write(fpsimd_last_state.st, NULL);
-
- /*
- * Leaving streaming mode enabled will cause issues for any kernel
- * NEON and leaving streaming mode or ZA enabled may increase power
- * consumption.
- */
- if (system_supports_sme())
- sme_smstop();
-
- set_thread_flag(TIF_FOREIGN_FPSTATE);
-}
-
-/*
* Save the FPSIMD state to memory and invalidate cpu view.
* This function must be called with preemption disabled.
*/
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 0491bf453c..cac7cba81b 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -884,4 +884,4 @@ static int __init init_hw_perf_events(void)
return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 3bcdd32a6b..338b474910 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -430,7 +430,9 @@ resume:
movec %a0,%dfc
/* restore status register */
- movew %a1@(TASK_THREAD+THREAD_SR),%sr
+ movew %a1@(TASK_THREAD+THREAD_SR),%d0
+ oriw #0x0700,%d0
+ movew %d0,%sr
rts
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 4c8f8cbfa0..e7f0f72c1b 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -453,30 +453,18 @@ void mac_poweroff(void)
void mac_reset(void)
{
- if (macintosh_config->adb_type == MAC_ADB_II &&
- macintosh_config->ident != MAC_MODEL_SE30) {
- /* need ROMBASE in booter */
- /* indeed, plus need to MAP THE ROM !! */
-
- if (mac_bi_data.rombase == 0)
- mac_bi_data.rombase = 0x40800000;
-
- /* works on some */
- rom_reset = (void *) (mac_bi_data.rombase + 0xa);
-
- local_irq_disable();
- rom_reset();
#ifdef CONFIG_ADB_CUDA
- } else if (macintosh_config->adb_type == MAC_ADB_EGRET ||
- macintosh_config->adb_type == MAC_ADB_CUDA) {
+ if (macintosh_config->adb_type == MAC_ADB_EGRET ||
+ macintosh_config->adb_type == MAC_ADB_CUDA) {
cuda_restart();
+ } else
#endif
#ifdef CONFIG_ADB_PMU
- } else if (macintosh_config->adb_type == MAC_ADB_PB2) {
+ if (macintosh_config->adb_type == MAC_ADB_PB2) {
pmu_restart();
+ } else
#endif
- } else if (CPU_IS_030) {
-
+ if (CPU_IS_030) {
/* 030-specific reset routine. The idea is general, but the
* specific registers to reset are '030-specific. Until I
* have a non-030 machine, I can't test anything else.
@@ -524,6 +512,18 @@ void mac_reset(void)
"jmp %/a0@\n\t" /* jump to the reset vector */
".chip 68k"
: : "r" (offset), "a" (rombase) : "a0");
+ } else {
+ /* need ROMBASE in booter */
+ /* indeed, plus need to MAP THE ROM !! */
+
+ if (mac_bi_data.rombase == 0)
+ mac_bi_data.rombase = 0x40800000;
+
+ /* works on some */
+ rom_reset = (void *)(mac_bi_data.rombase + 0xa);
+
+ local_irq_disable();
+ rom_reset();
}
/* should never get here */
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 9370888c9a..90554a5558 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -180,29 +180,39 @@ asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector)
asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address)
{
- int code = FPE_FLTUNK;
- unsigned long fpcsr = regs->fpcsr;
-
- if (fpcsr & SPR_FPCSR_IVF)
- code = FPE_FLTINV;
- else if (fpcsr & SPR_FPCSR_OVF)
- code = FPE_FLTOVF;
- else if (fpcsr & SPR_FPCSR_UNF)
- code = FPE_FLTUND;
- else if (fpcsr & SPR_FPCSR_DZF)
- code = FPE_FLTDIV;
- else if (fpcsr & SPR_FPCSR_IXF)
- code = FPE_FLTRES;
-
- /* Clear all flags */
- regs->fpcsr &= ~SPR_FPCSR_ALLF;
-
- force_sig_fault(SIGFPE, code, (void __user *)regs->pc);
+ if (user_mode(regs)) {
+ int code = FPE_FLTUNK;
+ unsigned long fpcsr = regs->fpcsr;
+
+ if (fpcsr & SPR_FPCSR_IVF)
+ code = FPE_FLTINV;
+ else if (fpcsr & SPR_FPCSR_OVF)
+ code = FPE_FLTOVF;
+ else if (fpcsr & SPR_FPCSR_UNF)
+ code = FPE_FLTUND;
+ else if (fpcsr & SPR_FPCSR_DZF)
+ code = FPE_FLTDIV;
+ else if (fpcsr & SPR_FPCSR_IXF)
+ code = FPE_FLTRES;
+
+ /* Clear all flags */
+ regs->fpcsr &= ~SPR_FPCSR_ALLF;
+
+ force_sig_fault(SIGFPE, code, (void __user *)regs->pc);
+ } else {
+ pr_emerg("KERNEL: Illegal fpe exception 0x%.8lx\n", regs->pc);
+ die("Die:", regs, SIGFPE);
+ }
}
asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
{
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc);
+ if (user_mode(regs)) {
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc);
+ } else {
+ pr_emerg("KERNEL: Illegal trap exception 0x%.8lx\n", regs->pc);
+ die("Die:", regs, SIGILL);
+ }
}
asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 6f0c92e814..dcf61cbd31 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -22,6 +22,7 @@ EXPORT_SYMBOL(memset);
#include <linux/atomic.h>
EXPORT_SYMBOL(__xchg8);
EXPORT_SYMBOL(__xchg32);
+EXPORT_SYMBOL(__cmpxchg_u8);
EXPORT_SYMBOL(__cmpxchg_u32);
EXPORT_SYMBOL(__cmpxchg_u64);
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 558ec68d76..60a2c59251 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -566,10 +566,12 @@ static const struct fsl_msi_feature ipic_msi_feature = {
.msiir_offset = 0x38,
};
+#ifdef CONFIG_EPAPR_PARAVIRT
static const struct fsl_msi_feature vmpic_msi_feature = {
.fsl_pic_ip = FSL_PIC_IP_VMPIC,
.msiir_offset = 0,
};
+#endif
static const struct of_device_id fsl_of_msi_ids[] = {
{
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 910ba8837a..2acc7d876e 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -82,14 +82,14 @@ config ERRATA_THEAD
Otherwise, please say "N" here to avoid unnecessary overhead.
-config ERRATA_THEAD_PBMT
- bool "Apply T-Head memory type errata"
+config ERRATA_THEAD_MAE
+ bool "Apply T-Head's memory attribute extension (XTheadMae) errata"
depends on ERRATA_THEAD && 64BIT && MMU
select RISCV_ALTERNATIVE_EARLY
default y
help
- This will apply the memory type errata to handle the non-standard
- memory type bits in page-table-entries on T-Head SoCs.
+ This will apply the memory attribute extension errata to handle the
+ non-standard PTE utilization on T-Head SoCs (XTheadMae).
If you don't know what to do here, say "Y".
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index b1c410bbc1..bf6a0a6318 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -19,20 +19,26 @@
#include <asm/patch.h>
#include <asm/vendorid_list.h>
-static bool errata_probe_pbmt(unsigned int stage,
- unsigned long arch_id, unsigned long impid)
+#define CSR_TH_SXSTATUS 0x5c0
+#define SXSTATUS_MAEE _AC(0x200000, UL)
+
+static bool errata_probe_mae(unsigned int stage,
+ unsigned long arch_id, unsigned long impid)
{
- if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT))
+ if (!IS_ENABLED(CONFIG_ERRATA_THEAD_MAE))
return false;
if (arch_id != 0 || impid != 0)
return false;
- if (stage == RISCV_ALTERNATIVES_EARLY_BOOT ||
- stage == RISCV_ALTERNATIVES_MODULE)
- return true;
+ if (stage != RISCV_ALTERNATIVES_EARLY_BOOT &&
+ stage != RISCV_ALTERNATIVES_MODULE)
+ return false;
+
+ if (!(csr_read(CSR_TH_SXSTATUS) & SXSTATUS_MAEE))
+ return false;
- return false;
+ return true;
}
/*
@@ -140,8 +146,8 @@ static u32 thead_errata_probe(unsigned int stage,
{
u32 cpu_req_errata = 0;
- if (errata_probe_pbmt(stage, archid, impid))
- cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
+ if (errata_probe_mae(stage, archid, impid))
+ cpu_req_errata |= BIT(ERRATA_THEAD_MAE);
errata_probe_cmo(stage, archid, impid);
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index ea33288f8a..9bad9dfa2f 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -23,7 +23,7 @@
#endif
#ifdef CONFIG_ERRATA_THEAD
-#define ERRATA_THEAD_PBMT 0
+#define ERRATA_THEAD_MAE 0
#define ERRATA_THEAD_PMU 1
#define ERRATA_THEAD_NUMBER 2
#endif
@@ -53,20 +53,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
* in the default case.
*/
#define ALT_SVPBMT_SHIFT 61
-#define ALT_THEAD_PBMT_SHIFT 59
+#define ALT_THEAD_MAE_SHIFT 59
#define ALT_SVPBMT(_val, prot) \
asm(ALTERNATIVE_2("li %0, 0\t\nnop", \
"li %0, %1\t\nslli %0,%0,%3", 0, \
RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \
"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \
: "=r"(_val) \
: "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \
- "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \
"I"(ALT_SVPBMT_SHIFT), \
- "I"(ALT_THEAD_PBMT_SHIFT))
+ "I"(ALT_THEAD_MAE_SHIFT))
-#ifdef CONFIG_ERRATA_THEAD_PBMT
+#ifdef CONFIG_ERRATA_THEAD_MAE
/*
* IO/NOCACHE memory types are handled together with svpbmt,
* so on T-Head chips, check if no other memory type is set,
@@ -83,11 +83,11 @@ asm volatile(ALTERNATIVE( \
"slli t3, t3, %3\n\t" \
"or %0, %0, t3\n\t" \
"2:", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \
: "+r"(_val) \
- : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
- "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \
- "I"(ALT_THEAD_PBMT_SHIFT) \
+ : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \
+ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \
+ "I"(ALT_THEAD_MAE_SHIFT) \
: "t3")
#else
#define ALT_THEAD_PMA(_val)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 0bd747d1d0..29f031f68f 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -516,33 +516,33 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
break;
/* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
case BPF_ADD | BPF_FETCH:
- emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
- rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
+ rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
if (!is64)
emit_zext_32(rs, ctx);
break;
case BPF_AND | BPF_FETCH:
- emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
- rv_amoand_w(rs, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
+ rv_amoand_w(rs, rs, rd, 1, 1), ctx);
if (!is64)
emit_zext_32(rs, ctx);
break;
case BPF_OR | BPF_FETCH:
- emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
- rv_amoor_w(rs, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
+ rv_amoor_w(rs, rs, rd, 1, 1), ctx);
if (!is64)
emit_zext_32(rs, ctx);
break;
case BPF_XOR | BPF_FETCH:
- emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
- rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
+ rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
if (!is64)
emit_zext_32(rs, ctx);
break;
/* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
case BPF_XCHG:
- emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
- rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
+ rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
if (!is64)
emit_zext_32(rs, ctx);
break;
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 5cc46e0dde..9725586f42 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int gmap_mark_unmergeable(void);
+int s390_disable_cow_sharing(void);
void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bb1b4bef18..4c2dc7abc2 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -32,6 +32,11 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
+ /*
+ * The mmu context allows COW-sharing of memory pages (KSM, zeropage).
+ * Note that COW-sharing during fork() is currently always allowed.
+ */
+ unsigned int allow_cow_sharing:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 929af18b09..a7789a9f62 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -35,6 +35,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
+ mm->context.allow_cow_sharing = 1;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 12a7b86789..0a7055518b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -566,10 +566,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
}
/*
- * In the case that a guest uses storage keys
- * faults should no longer be backed by zero pages
+ * As soon as the guest uses storage keys or enables PV, we deduplicate all
+ * mapped shared zeropages and prevent new shared zeropages from getting
+ * mapped.
*/
-#define mm_forbids_zeropage mm_has_pgste
+#define mm_forbids_zeropage mm_forbids_zeropage
+static inline int mm_forbids_zeropage(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (!mm->context.allow_cow_sharing)
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_uses_skeys(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ea63ac7698..7c17be6ad4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2631,9 +2631,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- mmap_write_lock(current->mm);
- r = gmap_mark_unmergeable();
- mmap_write_unlock(current->mm);
+ r = s390_disable_cow_sharing();
if (r)
break;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 41a4a60c5e..3b57837f3e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2548,41 +2548,6 @@ static inline void thp_split_mm(struct mm_struct *mm)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Remove all empty zero pages from the mapping for lazy refaulting
- * - This must be called after mm->context.has_pgste is set, to avoid
- * future creation of zero pages
- * - This must be called after THP was disabled.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * racing with the loop below and so causing pte_offset_map_lock() to fail,
- * it will never insert a page table containing empty zero pages once
- * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
- */
-static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long addr;
-
- for (addr = start; addr != end; addr += PAGE_SIZE) {
- pte_t *ptep;
- spinlock_t *ptl;
-
- ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!ptep)
- break;
- if (is_zero_pfn(pte_pfn(*ptep)))
- ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
- pte_unmap_unlock(ptep, ptl);
- }
- return 0;
-}
-
-static const struct mm_walk_ops zap_zero_walk_ops = {
- .pmd_entry = __zap_zero_pages,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2599,22 +2564,142 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-int gmap_mark_unmergeable(void)
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __s390_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+static int __s390_disable_cow_sharing(struct mm_struct *mm)
{
+ int rc;
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __s390_unshare_zeropages(mm);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- return ksm_disable(current->mm);
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int s390_disable_cow_sharing(void)
+{
+ int rc;
+
+ mmap_write_lock(current->mm);
+ rc = __s390_disable_cow_sharing(current->mm);
+ mmap_write_unlock(current->mm);
+ return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
/*
* Enable storage key handling from now on and initialize the storage
@@ -2683,7 +2768,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = gmap_mark_unmergeable();
+ rc = __s390_disable_cow_sharing(mm);
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5af0402e94..1d168a98ae 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1427,8 +1427,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
src_reg, dst_reg, off); \
- if (is32 && (insn->imm & BPF_FETCH)) \
- EMIT_ZERO(src_reg); \
+ if (insn->imm & BPF_FETCH) { \
+ /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
+ _EMIT2(0x07e0); \
+ if (is32) \
+ EMIT_ZERO(src_reg); \
+ } \
} while (0)
case BPF_ADD:
case BPF_ADD | BPF_FETCH:
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index aed1ea8e2c..74051b8ddf 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -44,17 +44,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if (OPCODE_RTE(opcode))
return -EFAULT; /* Bad breakpoint */
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = opcode;
return 0;
}
-void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- p->opcode = *p->addr;
-}
-
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
*p->addr = BREAKPOINT_INSTRUCTION;
diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S
index 3e07074e00..06fed5a21e 100644
--- a/arch/sh/lib/checksum.S
+++ b/arch/sh/lib/checksum.S
@@ -33,7 +33,8 @@
*/
/*
- * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
+ * unsigned int csum_partial(const unsigned char *buf, int len,
+ * unsigned int sum);
*/
.text
@@ -45,31 +46,11 @@ ENTRY(csum_partial)
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
+ mov r5, r1
mov r4, r0
- tst #3, r0 ! Check alignment.
- bt/s 2f ! Jump if alignment is ok.
- mov r4, r7 ! Keep a copy to check for alignment
+ tst #2, r0 ! Check alignment.
+ bt 2f ! Jump if alignment is ok.
!
- tst #1, r0 ! Check alignment.
- bt 21f ! Jump if alignment is boundary of 2bytes.
-
- ! buf is odd
- tst r5, r5
- add #-1, r5
- bt 9f
- mov.b @r4+, r0
- extu.b r0, r0
- addc r0, r6 ! t=0 from previous tst
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
- mov r4, r0
- tst #2, r0
- bt 2f
-21:
- ! buf is 2 byte aligned (len could be 0)
add #-2, r5 ! Alignment uses up two bytes.
cmp/pz r5 !
bt/s 1f ! Jump if we had at least two bytes.
@@ -77,17 +58,16 @@ ENTRY(csum_partial)
bra 6f
add #2, r5 ! r5 was < 2. Deal with it.
1:
+ mov r5, r1 ! Save new len for later use.
mov.w @r4+, r0
extu.w r0, r0
addc r0, r6
bf 2f
add #1, r6
2:
- ! buf is 4 byte aligned (len could be 0)
- mov r5, r1
mov #-5, r0
- shld r0, r1
- tst r1, r1
+ shld r0, r5
+ tst r5, r5
bt/s 4f ! if it's =0, go to 4f
clrt
.align 2
@@ -109,31 +89,30 @@ ENTRY(csum_partial)
addc r0, r6
addc r2, r6
movt r0
- dt r1
+ dt r5
bf/s 3b
cmp/eq #1, r0
- ! here, we know r1==0
- addc r1, r6 ! add carry to r6
+ ! here, we know r5==0
+ addc r5, r6 ! add carry to r6
4:
- mov r5, r0
+ mov r1, r0
and #0x1c, r0
tst r0, r0
- bt 6f
- ! 4 bytes or more remaining
- mov r0, r1
- shlr2 r1
+ bt/s 6f
+ mov r0, r5
+ shlr2 r5
mov #0, r2
5:
addc r2, r6
mov.l @r4+, r2
movt r0
- dt r1
+ dt r5
bf/s 5b
cmp/eq #1, r0
addc r2, r6
- addc r1, r6 ! r1==0 here, so it means add carry-bit
+ addc r5, r6 ! r5==0 here, so it means add carry-bit
6:
- ! 3 bytes or less remaining
+ mov r1, r5
mov #3, r0
and r0, r5
tst r5, r5
@@ -159,16 +138,6 @@ ENTRY(csum_partial)
mov #0, r0
addc r0, r6
9:
- ! Check if the buffer was misaligned, if so realign sum
- mov r7, r0
- tst #1, r0
- bt 10f
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
-10:
rts
mov r6, r0
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f49999a6b..bfccf12138 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2474,9 +2474,13 @@ menuconfig CPU_MITIGATIONS
help
Say Y here to enable options which enable mitigations for hardware
vulnerabilities (usually related to speculative execution).
+ Mitigations can be disabled or restricted to SMT systems at runtime
+ via the "mitigations" kernel parameter.
- If you say N, all mitigations will be disabled. You really
- should know what you are doing to say so.
+ If you say N, all mitigations will be disabled. This CANNOT be
+ overridden at runtime.
+
+ Say 'Y', unless you really know what you are doing.
if CPU_MITIGATIONS
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index bf4a10a579..1dcb794c54 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -398,6 +398,11 @@ SYM_CODE_START(startup_64)
call sev_enable
#endif
+ /* Preserve only the CR4 bits that must be preserved, and clear the rest */
+ movq %cr4, %rax
+ andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax
+ movq %rax, %cr4
+
/*
* configure_5level_paging() updates the number of paging levels using
* a trampoline in 32-bit addressable memory if the current number does
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
index ef73a3ab87..791386d9a8 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -154,5 +154,6 @@ SYM_TYPED_FUNC_START(nh_avx2)
vpaddq T1, T0, T0
vpaddq T4, T0, T0
vmovdqu T0, (HASH)
+ vzeroupper
RET
SYM_FUNC_END(nh_avx2)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9918212faf..0ffb072be9 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -716,6 +716,7 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx)
popq %r13
popq %r12
popq %rbx
+ vzeroupper
RET
SYM_FUNC_END(sha256_transform_rorx)
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index f08496cd68..24973f42c4 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -680,6 +680,7 @@ SYM_TYPED_FUNC_START(sha512_transform_rorx)
pop %r12
pop %rbx
+ vzeroupper
RET
SYM_FUNC_END(sha512_transform_rorx)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca8120ae..1245000a87 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
- /*
- * XXX: if access_ok, get_user, and put_user handled
- * sig_on_uaccess_err, this could go away.
- */
-
if (!access_ok((void __user *)ptr, size)) {
struct thread_struct *thread = &current->thread;
@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
bool emulate_vsyscall(unsigned long error_code,
struct pt_regs *regs, unsigned long address)
{
- struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
- int prev_sig_on_uaccess_err;
long ret;
unsigned long orig_dx;
@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code,
goto sigsegv;
}
- tsk = current;
-
/*
* Check for access_ok violations and find the syscall nr.
*
@@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code,
goto do_ret; /* skip requested */
/*
- * With a real vsyscall, page faults cause SIGSEGV. We want to
- * preserve that behavior to make writing exploits harder.
+ * With a real vsyscall, page faults cause SIGSEGV.
*/
- prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
- current->thread.sig_on_uaccess_err = 1;
-
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
@@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code,
break;
}
- current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
-
check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
-
- /*
- * If we failed to generate a signal for any reason,
- * generate one here. (This should be impossible.)
- */
- if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
- !sigismember(&tsk->pending.signal, SIGSEGV)))
- goto sigsegv;
-
- return true; /* Don't emulate the ret. */
+ goto sigsegv;
}
regs->ax = ret;
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 44b08b53ab..c1d6cd58f8 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -62,7 +62,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
asm volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*ptr), \
+ [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 9abb8cc4cd..b786449626 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -567,6 +567,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 26620d7642..5636ad6973 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -479,7 +479,6 @@ struct thread_struct {
unsigned long iopl_emul;
unsigned int iopl_warn:1;
- unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 1be13b2dfe..64df897c0e 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -37,8 +37,6 @@ extern int phys_to_target_node(phys_addr_t start);
#define phys_to_target_node phys_to_target_node
extern int memory_add_physaddr_to_nid(u64 start);
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-extern int numa_fill_memblks(u64 start, u64 end);
-#define numa_fill_memblks numa_fill_memblks
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 13b45b9c80..620f0af713 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -465,7 +465,7 @@ static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, siz
return !__apply_microcode_amd(mc);
}
-static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
+static bool get_builtin_microcode(struct cpio_data *cp, u8 family)
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct firmware fw;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 1123ef3ccf..4334033658 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -193,11 +193,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
cur->warned = false;
/*
- * If a non-zero TSC value for socket 0 may be valid then the default
- * adjusted value cannot assumed to be zero either.
+ * The default adjust value cannot be assumed to be zero on any socket.
*/
- if (tsc_async_resets)
- cur->adjusted = bootval;
+ cur->adjusted = bootval;
/*
* Check whether this CPU is the first in a package to come up. In
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5168ee0360..d1ccd06c53 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -148,7 +148,7 @@ AVXcode:
65: SEG=GS (Prefix)
66: Operand-Size (Prefix)
67: Address-Size (Prefix)
-68: PUSH Iz (d64)
+68: PUSH Iz
69: IMUL Gv,Ev,Iz
6a: PUSH Ib (d64)
6b: IMUL Gv,Ev,Ib
@@ -698,10 +698,10 @@ AVXcode: 2
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
-50: vpdpbusd Vx,Hx,Wx (66),(ev)
-51: vpdpbusds Vx,Hx,Wx (66),(ev)
-52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66),(ev) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev)
-53: vpdpwssds Vx,Hx,Wx (66),(ev) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev)
+50: vpdpbusd Vx,Hx,Wx (66)
+51: vpdpbusds Vx,Hx,Wx (66)
+52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev)
+53: vpdpwssds Vx,Hx,Wx (66) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev)
54: vpopcntb/w Vx,Wx (66),(ev)
55: vpopcntd/q Vx,Wx (66),(ev)
58: vpbroadcastd Vx,Wx (66),(v)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b01df023de..e604d2d6cc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -717,39 +717,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
WARN_ON_ONCE(user_mode(regs));
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
- /*
- * Any interrupt that takes a fault gets the fixup. This makes
- * the below recursive fault logic only apply to a faults from
- * task context.
- */
- if (in_interrupt())
- return;
-
- /*
- * Per the above we're !in_interrupt(), aka. task context.
- *
- * In this case we need to make sure we're not recursively
- * faulting through the emulate_vsyscall() logic.
- */
- if (current->thread.sig_on_uaccess_err && signal) {
- sanitize_error_code(address, &error_code);
-
- set_signal_archinfo(address, error_code);
-
- if (si_code == SEGV_PKUERR) {
- force_sig_pkuerr((void __user *)address, pkey);
- } else {
- /* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address);
- }
- }
-
- /*
- * Barring that, we can do the fixup and be happy.
- */
+ if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
return;
- }
/*
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 65e9a6e391..ce84ba86e6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -929,6 +929,8 @@ int memory_add_physaddr_to_nid(u64 start)
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+
static int __init cmp_memblk(const void *a, const void *b)
{
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
@@ -1001,5 +1003,3 @@ int __init numa_fill_memblks(u64 start, u64 end)
}
return 0;
}
-
-#endif
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 135bb594df..b4073fb452 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
* Validate strict W^X semantics.
*/
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
- unsigned long pfn, unsigned long npg)
+ unsigned long pfn, unsigned long npg,
+ bool nx, bool rw)
{
unsigned long end;
@@ -641,6 +642,10 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
return new;
+ /* Non-leaf translation entries can disable writing or execution. */
+ if (!rw || nx)
+ return new;
+
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
(unsigned long long)pgprot_val(old),
@@ -657,20 +662,26 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
/*
* Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry and the level of the mapping.
+ * Return a pointer to the entry, the level of the mapping, and the effective
+ * NX and RW bits of all page table levels.
*/
-pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
- unsigned int *level)
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw)
{
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
*level = PG_LEVEL_NONE;
+ *nx = false;
+ *rw = true;
if (pgd_none(*pgd))
return NULL;
+ *nx |= pgd_flags(*pgd) & _PAGE_NX;
+ *rw &= pgd_flags(*pgd) & _PAGE_RW;
+
p4d = p4d_offset(pgd, address);
if (p4d_none(*p4d))
return NULL;
@@ -679,6 +690,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (p4d_large(*p4d) || !p4d_present(*p4d))
return (pte_t *)p4d;
+ *nx |= p4d_flags(*p4d) & _PAGE_NX;
+ *rw &= p4d_flags(*p4d) & _PAGE_RW;
+
pud = pud_offset(p4d, address);
if (pud_none(*pud))
return NULL;
@@ -687,6 +701,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;
+ *nx |= pud_flags(*pud) & _PAGE_NX;
+ *rw &= pud_flags(*pud) & _PAGE_RW;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
@@ -695,12 +712,27 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pmd_large(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
+ *nx |= pmd_flags(*pmd) & _PAGE_NX;
+ *rw &= pmd_flags(*pmd) & _PAGE_RW;
+
*level = PG_LEVEL_4K;
return pte_offset_kernel(pmd, address);
}
/*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ unsigned int *level)
+{
+ bool nx, rw;
+
+ return lookup_address_in_pgd_attr(pgd, address, level, &nx, &rw);
+}
+
+/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*
@@ -715,13 +747,16 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
EXPORT_SYMBOL_GPL(lookup_address);
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
- unsigned int *level)
+ unsigned int *level, bool *nx, bool *rw)
{
- if (cpa->pgd)
- return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
- address, level);
+ pgd_t *pgd;
+
+ if (!cpa->pgd)
+ pgd = pgd_offset_k(address);
+ else
+ pgd = cpa->pgd + pgd_index(address);
- return lookup_address(address, level);
+ return lookup_address_in_pgd_attr(pgd, address, level, nx, rw);
}
/*
@@ -849,12 +884,13 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
pgprot_t old_prot, new_prot, req_prot, chk_prot;
pte_t new_pte, *tmp;
enum pg_level level;
+ bool nx, rw;
/*
* Check for races, another CPU might have split this page
* up already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte)
return 1;
@@ -965,7 +1001,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);
- new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+ new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages,
+ nx, rw);
/*
* If there is a conflict, split the large page.
@@ -1046,6 +1083,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
pte_t *pbase = (pte_t *)page_address(base);
unsigned int i, level;
pgprot_t ref_prot;
+ bool nx, rw;
pte_t *tmp;
spin_lock(&pgd_lock);
@@ -1053,7 +1091,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
* Check for races, another CPU might have split this page
* up for us already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte) {
spin_unlock(&pgd_lock);
return 1;
@@ -1594,10 +1632,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
int do_split, err;
unsigned int level;
pte_t *kpte, old_pte;
+ bool nx, rw;
address = __cpa_addr(cpa, cpa->curpage);
repeat:
- kpte = _lookup_address_cpa(cpa, address, &level);
+ kpte = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (!kpte)
return __cpa_process_fault(cpa, address, primary);
@@ -1619,7 +1658,8 @@ repeat:
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);
- new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+ new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1,
+ nx, rw);
new_prot = pgprot_clear_protnone_bits(new_prot);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index df484885cc..f415c2cf53 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1585,36 +1585,41 @@ st: if (is_imm8(insn->off))
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
- * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
- * src_reg is used as scratch for src_reg += insn->off and restored
- * after emit_ldx if necessary
+ * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
+ * and
+ * src_reg + insn->off < VSYSCALL_ADDR
*/
- u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
+ u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp;
- /* At end of these emitted checks, insn->off will have been added
- * to src_reg, so no need to do relative load with insn->off offset
- */
- insn_off = 0;
+ /* movabsq r10, VSYSCALL_ADDR */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
+ (u32)(long)VSYSCALL_ADDR);
- /* movabsq r11, limit */
- EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
- EMIT((u32)limit, 4);
- EMIT(limit >> 32, 4);
+ /* mov src_reg, r11 */
+ EMIT_mov(AUX_REG, src_reg);
if (insn->off) {
- /* add src_reg, insn->off */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
+ /* add r11, insn->off */
+ maybe_emit_1mod(&prog, AUX_REG, true);
+ EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
}
- /* cmp src_reg, r11 */
- maybe_emit_mod(&prog, src_reg, AUX_REG, true);
- EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
+ /* sub r11, r10 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
+
+ /* movabsq r10, limit */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
+ (u32)(long)limit);
+
+ /* cmp r10, r11 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
- /* if unsigned '>=', goto load */
- EMIT2(X86_JAE, 0);
+ /* if unsigned '>', goto load */
+ EMIT2(X86_JA, 0);
end_of_jmp = prog;
/* xor dst_reg, dst_reg */
@@ -1640,18 +1645,6 @@ st: if (is_imm8(insn->off))
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;
- if (insn->off && src_reg != dst_reg) {
- /* sub src_reg, insn->off
- * Restore src_reg after "add src_reg, insn->off" in prev
- * if statement. But if src_reg == dst_reg, emit_ldx
- * above already clobbered src_reg, so no need to restore.
- * If add src_reg, insn->off was unnecessary, no need to
- * restore either.
- */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
- }
-
if (!bpf_prog->aux->extable)
break;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 08aa0f25f1..8d1c82795e 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -42,7 +42,8 @@ KCOV_INSTRUMENT := n
# make up the standalone purgatory.ro
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
-PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0
+PURGATORY_CFLAGS := -mcmodel=small -ffreestanding -fno-zero-initialized-in-bss -g0
+PURGATORY_CFLAGS += -fpic -fvisibility=hidden
PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
PURGATORY_CFLAGS += -fno-stack-protector
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b029fb81eb..e7a44a7f61 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -746,6 +746,15 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
continue;
}
+
+ /*
+ * Do not perform relocations in .notes sections; any
+ * values there are meant for pre-boot consumption (e.g.
+ * startup_xen).
+ */
+ if (sec_applies->shdr.sh_type == SHT_NOTE)
+ continue;
+
sh_symtab = sec_symtab->symtab;
sym_strtab = sec_symtab->link->strtab;
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {