summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 16:58:07 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 16:58:07 +0000
commit5a6d13c86f2fe6304450e907fc1d8d72da82efef (patch)
tree1bd4e8854203c575eabaa99e2c509f8886065733 /arch
parentAdding upstream version 6.1.76. (diff)
downloadlinux-upstream/6.1.82.tar.xz
linux-upstream/6.1.82.zip
Adding upstream version 6.1.82.upstream/6.1.82
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig1
-rw-r--r--arch/arc/include/asm/jump_label.h4
-rw-r--r--arch/arm/boot/dts/imx1-ads.dts2
-rw-r--r--arch/arm/boot/dts/imx1-apf9328.dts2
-rw-r--r--arch/arm/boot/dts/imx1.dtsi5
-rw-r--r--arch/arm/boot/dts/imx23-sansa.dts12
-rw-r--r--arch/arm/boot/dts/imx23.dtsi4
-rw-r--r--arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi2
-rw-r--r--arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts2
-rw-r--r--arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts2
-rw-r--r--arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts2
-rw-r--r--arch/arm/boot/dts/imx25-pdk.dts2
-rw-r--r--arch/arm/boot/dts/imx25.dtsi2
-rw-r--r--arch/arm/boot/dts/imx27-apf27dev.dts4
-rw-r--r--arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi4
-rw-r--r--arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts2
-rw-r--r--arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts2
-rw-r--r--arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts2
-rw-r--r--arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi2
-rw-r--r--arch/arm/boot/dts/imx27.dtsi3
-rw-r--r--arch/arm/boot/dts/imx28.dtsi4
-rw-r--r--arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6sx.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6ul.dtsi2
-rw-r--r--arch/arm/boot/dts/imx7d.dtsi3
-rw-r--r--arch/arm/boot/dts/imx7s.dtsi13
-rw-r--r--arch/arm/boot/dts/rk3036.dtsi14
-rw-r--r--arch/arm/include/asm/irq_work.h2
-rw-r--r--arch/arm/include/asm/jump_label.h4
-rw-r--r--arch/arm/mach-ep93xx/core.c1
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-s4.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/apq8016-sbc.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi21
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998.dtsi32
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/sm8150.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/px30.dtsi2
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c11
-rw-r--r--arch/arm64/include/asm/alternative-macros.h4
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/efi.h1
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/irq_work.h2
-rw-r--r--arch/arm64/include/asm/jump_label.h4
-rw-r--r--arch/arm64/kernel/cpu_errata.c3
-rw-r--r--arch/arm64/kernel/fpsimd.c14
-rw-r--r--arch/arm64/kernel/irq.c7
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kernel/suspend.c3
-rw-r--r--arch/arm64/kernel/vdso32/Makefile2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/csky/include/asm/irq_work.h2
-rw-r--r--arch/csky/include/asm/jump_label.h4
-rw-r--r--arch/loongarch/Kconfig23
-rw-r--r--arch/loongarch/kernel/smp.c2
-rw-r--r--arch/loongarch/mm/tlb.c16
-rw-r--r--arch/mips/include/asm/checksum.h3
-rw-r--r--arch/mips/include/asm/jump_label.h4
-rw-r--r--arch/mips/kernel/traps.c8
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/assembly.h1
-rw-r--r--arch/parisc/include/asm/extable.h64
-rw-r--r--arch/parisc/include/asm/jump_label.h4
-rw-r--r--arch/parisc/include/asm/special_insns.h6
-rw-r--r--arch/parisc/include/asm/uaccess.h48
-rw-r--r--arch/parisc/kernel/drivers.c3
-rw-r--r--arch/parisc/kernel/processor.c8
-rw-r--r--arch/parisc/kernel/unaligned.c44
-rw-r--r--arch/parisc/mm/fault.c11
-rw-r--r--arch/powerpc/include/asm/bug.h2
-rw-r--r--arch/powerpc/include/asm/irq_work.h1
-rw-r--r--arch/powerpc/include/asm/jump_label.h4
-rw-r--r--arch/powerpc/include/asm/mmu.h4
-rw-r--r--arch/powerpc/include/asm/mmzone.h8
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/include/asm/uaccess.h8
-rw-r--r--arch/powerpc/kernel/cpu_specs_e500mc.h3
-rw-r--r--arch/powerpc/kernel/interrupt_64.S4
-rw-r--r--arch/powerpc/kernel/irq_64.c2
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--arch/powerpc/lib/sstep.c10
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c2
-rw-r--r--arch/powerpc/mm/init-common.c5
-rw-r--r--arch/powerpc/mm/kasan/init_32.c1
-rw-r--r--arch/powerpc/mm/mmu_decl.h5
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c156
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/riscv/include/asm/ftrace.h5
-rw-r--r--arch/riscv/include/asm/irq_work.h2
-rw-r--r--arch/riscv/include/asm/jump_label.h4
-rw-r--r--arch/riscv/include/asm/pgtable.h2
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/return_address.c48
-rw-r--r--arch/s390/include/asm/irq_work.h2
-rw-r--r--arch/s390/include/asm/jump_label.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h7
-rw-r--r--arch/s390/kernel/ptrace.c6
-rw-r--r--arch/s390/kvm/gaccess.c7
-rw-r--r--arch/s390/kvm/kvm-s390.c14
-rw-r--r--arch/s390/kvm/vsie.c6
-rw-r--r--arch/s390/mm/gmap.c1
-rw-r--r--arch/s390/pci/pci.c2
-rw-r--r--arch/sparc/include/asm/jump_label.h4
-rw-r--r--arch/um/Makefile4
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/include/asm/cpufeature.h2
-rw-r--r--arch/um/include/shared/kern_util.h2
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/um/kernel/time.c32
-rw-r--r--arch/um/os-Linux/helper.c6
-rw-r--r--arch/um/os-Linux/util.c19
-rw-r--r--arch/x86/Kconfig28
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/boot/compressed/Makefile13
-rw-r--r--arch/x86/boot/compressed/acpi.c14
-rw-r--r--arch/x86/boot/compressed/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/efi_mixed.S328
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S195
-rw-r--r--arch/x86/boot/compressed/head_32.S38
-rw-r--r--arch/x86/boot/compressed/head_64.S593
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c12
-rw-r--r--arch/x86/boot/compressed/idt_64.c1
-rw-r--r--arch/x86/boot/compressed/idt_handlers_64.S1
-rw-r--r--arch/x86/boot/compressed/kaslr.c26
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S152
-rw-r--r--arch/x86/boot/compressed/misc.c85
-rw-r--r--arch/x86/boot/compressed/misc.h4
-rw-r--r--arch/x86/boot/compressed/pgtable.h10
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c94
-rw-r--r--arch/x86/boot/compressed/sev.c114
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/boot/tools/build.c2
-rw-r--r--arch/x86/entry/entry.S23
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/include/asm/boot.h10
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/efi.h14
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/irq_work.h1
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kmsan.h17
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/nospec-branch.h29
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/sev.h7
-rw-r--r--arch/x86/include/asm/uaccess.h10
-rw-r--r--arch/x86/include/asm/virtext.h12
-rw-r--r--arch/x86/kernel/alternative.c13
-rw-r--r--arch/x86/kernel/cpu/bugs.c107
-rw-r--r--arch/x86/kernel/cpu/common.c38
-rw-r--r--arch/x86/kernel/cpu/intel.c178
-rw-r--r--arch/x86/kernel/cpu/mce/core.c16
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/fpu/signal.c13
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/static_call.c2
-rw-r--r--arch/x86/kvm/svm/svm_ops.h6
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S9
-rw-r--r--arch/x86/kvm/vmx/vmx.c20
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h6
-rw-r--r--arch/x86/kvm/x86.c5
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/xtensa/include/asm/jump_label.h4
174 files changed, 1863 insertions, 1419 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 14273a620..f99fd9a4c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -642,6 +642,7 @@ config SHADOW_CALL_STACK
bool "Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+ depends on MMU
help
This option enables the compiler's Shadow Call Stack, which
uses a shadow stack to protect function return addresses from
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index 9d9618079..a339223d9 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -31,7 +31,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"nop \n"
".pushsection __jump_table, \"aw\" \n"
@@ -47,7 +47,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"b %l[l_yes] \n"
".pushsection __jump_table, \"aw\" \n"
diff --git a/arch/arm/boot/dts/imx1-ads.dts b/arch/arm/boot/dts/imx1-ads.dts
index 5833fb6f1..2c817c4a4 100644
--- a/arch/arm/boot/dts/imx1-ads.dts
+++ b/arch/arm/boot/dts/imx1-ads.dts
@@ -65,7 +65,7 @@
pinctrl-0 = <&pinctrl_weim>;
status = "okay";
- nor: nor@0,0 {
+ nor: flash@0,0 {
compatible = "cfi-flash";
reg = <0 0x00000000 0x02000000>;
bank-width = <4>;
diff --git a/arch/arm/boot/dts/imx1-apf9328.dts b/arch/arm/boot/dts/imx1-apf9328.dts
index 77b21aa7a..27e72b07b 100644
--- a/arch/arm/boot/dts/imx1-apf9328.dts
+++ b/arch/arm/boot/dts/imx1-apf9328.dts
@@ -45,7 +45,7 @@
pinctrl-0 = <&pinctrl_weim>;
status = "okay";
- nor: nor@0,0 {
+ nor: flash@0,0 {
compatible = "cfi-flash";
reg = <0 0x00000000 0x02000000>;
bank-width = <2>;
diff --git a/arch/arm/boot/dts/imx1.dtsi b/arch/arm/boot/dts/imx1.dtsi
index e312f1e74..4aeb74479 100644
--- a/arch/arm/boot/dts/imx1.dtsi
+++ b/arch/arm/boot/dts/imx1.dtsi
@@ -268,9 +268,12 @@
status = "disabled";
};
- esram: esram@300000 {
+ esram: sram@300000 {
compatible = "mmio-sram";
reg = <0x00300000 0x20000>;
+ ranges = <0 0x00300000 0x20000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
};
};
};
diff --git a/arch/arm/boot/dts/imx23-sansa.dts b/arch/arm/boot/dts/imx23-sansa.dts
index 46057d9bf..c2efcc20a 100644
--- a/arch/arm/boot/dts/imx23-sansa.dts
+++ b/arch/arm/boot/dts/imx23-sansa.dts
@@ -175,10 +175,8 @@
#address-cells = <1>;
#size-cells = <0>;
compatible = "i2c-gpio";
- gpios = <
- &gpio1 24 0 /* SDA */
- &gpio1 22 0 /* SCL */
- >;
+ sda-gpios = <&gpio1 24 0>;
+ scl-gpios = <&gpio1 22 0>;
i2c-gpio,delay-us = <2>; /* ~100 kHz */
};
@@ -186,10 +184,8 @@
#address-cells = <1>;
#size-cells = <0>;
compatible = "i2c-gpio";
- gpios = <
- &gpio0 31 0 /* SDA */
- &gpio0 30 0 /* SCL */
- >;
+ sda-gpios = <&gpio0 31 0>;
+ scl-gpios = <&gpio0 30 0>;
i2c-gpio,delay-us = <2>; /* ~100 kHz */
touch: touch@20 {
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index 7f4c60245..b236d23f8 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -59,7 +59,7 @@
reg = <0x80000000 0x2000>;
};
- dma_apbh: dma-apbh@80004000 {
+ dma_apbh: dma-controller@80004000 {
compatible = "fsl,imx23-dma-apbh";
reg = <0x80004000 0x2000>;
interrupts = <0 14 20 0
@@ -414,7 +414,7 @@
status = "disabled";
};
- dma_apbx: dma-apbx@80024000 {
+ dma_apbx: dma-controller@80024000 {
compatible = "fsl,imx23-dma-apbx";
reg = <0x80024000 0x2000>;
interrupts = <7 5 9 26
diff --git a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi
index 0703f62d1..93a6e4e68 100644
--- a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi
+++ b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi
@@ -27,7 +27,7 @@
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
- pcf8563@51 {
+ rtc@51 {
compatible = "nxp,pcf8563";
reg = <0x51>;
};
diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts
index 7d4301b22..1ed3fb7b9 100644
--- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts
+++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts
@@ -16,7 +16,7 @@
bus-width = <18>;
display-timings {
native-mode = <&qvga_timings>;
- qvga_timings: 320x240 {
+ qvga_timings: timing0 {
clock-frequency = <6500000>;
hactive = <320>;
vactive = <240>;
diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts
index 80a7f96de..64b2ffac4 100644
--- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts
+++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts
@@ -16,7 +16,7 @@
bus-width = <18>;
display-timings {
native-mode = <&dvi_svga_timings>;
- dvi_svga_timings: 800x600 {
+ dvi_svga_timings: timing0 {
clock-frequency = <40000000>;
hactive = <800>;
vactive = <600>;
diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts
index 24027a1fb..fb074bfda 100644
--- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts
+++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts
@@ -16,7 +16,7 @@
bus-width = <18>;
display-timings {
native-mode = <&dvi_vga_timings>;
- dvi_vga_timings: 640x480 {
+ dvi_vga_timings: timing0 {
clock-frequency = <31250000>;
hactive = <640>;
vactive = <480>;
diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts
index fb66884d8..59b40d13a 100644
--- a/arch/arm/boot/dts/imx25-pdk.dts
+++ b/arch/arm/boot/dts/imx25-pdk.dts
@@ -78,7 +78,7 @@
bus-width = <18>;
display-timings {
native-mode = <&wvga_timings>;
- wvga_timings: 640x480 {
+ wvga_timings: timing0 {
hactive = <640>;
vactive = <480>;
hback-porch = <45>;
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index 5f90d72b8..5ac454928 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -543,7 +543,7 @@
};
iim: efuse@53ff0000 {
- compatible = "fsl,imx25-iim", "fsl,imx27-iim";
+ compatible = "fsl,imx25-iim";
reg = <0x53ff0000 0x4000>;
interrupts = <19>;
clocks = <&clks 99>;
diff --git a/arch/arm/boot/dts/imx27-apf27dev.dts b/arch/arm/boot/dts/imx27-apf27dev.dts
index 6f1e8ce9e..3d9bb7fc3 100644
--- a/arch/arm/boot/dts/imx27-apf27dev.dts
+++ b/arch/arm/boot/dts/imx27-apf27dev.dts
@@ -16,7 +16,7 @@
fsl,pcr = <0xfae80083>; /* non-standard but required */
display-timings {
native-mode = <&timing0>;
- timing0: 800x480 {
+ timing0: timing0 {
clock-frequency = <33000033>;
hactive = <800>;
vactive = <480>;
@@ -47,7 +47,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_gpio_leds>;
- user {
+ led-user {
label = "Heartbeat";
gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>;
linux,default-trigger = "heartbeat";
diff --git a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi
index 74110bbcd..c7e923584 100644
--- a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi
+++ b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi
@@ -33,7 +33,7 @@
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
- pcf8563@51 {
+ rtc@51 {
compatible = "nxp,pcf8563";
reg = <0x51>;
};
@@ -90,7 +90,7 @@
&weim {
status = "okay";
- nor: nor@0,0 {
+ nor: flash@0,0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "cfi-flash";
diff --git a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts
index 9c3ec82ec..50fa0bd4c 100644
--- a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts
+++ b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts
@@ -16,7 +16,7 @@
display-timings {
native-mode = <&timing0>;
- timing0: 320x240 {
+ timing0: timing0 {
clock-frequency = <6500000>;
hactive = <320>;
vactive = <240>;
diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts
index 188639738..7f36af150 100644
--- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts
+++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts
@@ -19,7 +19,7 @@
fsl,pcr = <0xf0c88080>; /* non-standard but required */
display-timings {
native-mode = <&timing0>;
- timing0: 640x480 {
+ timing0: timing0 {
hactive = <640>;
vactive = <480>;
hback-porch = <112>;
diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts
index 344e77790..d133b9f08 100644
--- a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts
+++ b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts
@@ -19,7 +19,7 @@
display-timings {
native-mode = <&timing0>;
- timing0: 240x320 {
+ timing0: timing0 {
clock-frequency = <5500000>;
hactive = <240>;
vactive = <320>;
diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi
index 3d1027317..a5fdc2fd4 100644
--- a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi
@@ -322,7 +322,7 @@
&weim {
status = "okay";
- nor: nor@0,0 {
+ nor: flash@0,0 {
compatible = "cfi-flash";
reg = <0 0x00000000 0x02000000>;
bank-width = <2>;
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index e140307be..eb6daf224 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -588,6 +588,9 @@
iram: sram@ffff4c00 {
compatible = "mmio-sram";
reg = <0xffff4c00 0xb400>;
+ ranges = <0 0xffff4c00 0xb400>;
+ #address-cells = <1>;
+ #size-cells = <1>;
};
};
};
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 130b4145a..b81592a61 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -78,7 +78,7 @@
status = "disabled";
};
- dma_apbh: dma-apbh@80004000 {
+ dma_apbh: dma-controller@80004000 {
compatible = "fsl,imx28-dma-apbh";
reg = <0x80004000 0x2000>;
interrupts = <82 83 84 85
@@ -994,7 +994,7 @@
status = "disabled";
};
- dma_apbx: dma-apbx@80024000 {
+ dma_apbx: dma-controller@80024000 {
compatible = "fsl,imx28-dma-apbx";
reg = <0x80024000 0x2000>;
interrupts = <78 79 66 0
diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts
index f9f7d99bd..76f3e07bc 100644
--- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts
+++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts
@@ -76,6 +76,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enable_can1_power>;
regulator-name = "can1_supply";
+ startup-delay-us = <1000>;
};
reg_can2_supply: regulator-can2-supply {
@@ -85,6 +86,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enable_can2_power>;
regulator-name = "can2_supply";
+ startup-delay-us = <1000>;
};
};
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index ff1e0173b..2c6eada01 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -150,7 +150,7 @@
interrupt-parent = <&gpc>;
ranges;
- dma_apbh: dma-apbh@110000 {
+ dma_apbh: dma-controller@110000 {
compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x00110000 0x2000>;
interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 1f1053a89..67d344ae7 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -209,7 +209,7 @@
power-domains = <&pd_pu>;
};
- dma_apbh: dma-apbh@1804000 {
+ dma_apbh: dma-controller@1804000 {
compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x01804000 0x2000>;
interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index 2b5996395..aac081b6d 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -164,7 +164,7 @@
<0x00a06000 0x2000>;
};
- dma_apbh: dma-apbh@1804000 {
+ dma_apbh: dma-controller@1804000 {
compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x01804000 0x2000>;
interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 7ceb7c09f..7ef685fdd 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -208,9 +208,6 @@
};
&ca_funnel_in_ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
port@1 {
reg = <1>;
ca_funnel_in_port1: endpoint {
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 459477071..69aebc691 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -190,7 +190,11 @@
clock-names = "apb_pclk";
ca_funnel_in_ports: in-ports {
- port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
ca_funnel_in_port0: endpoint {
remote-endpoint = <&etm0_out_port>;
};
@@ -814,7 +818,7 @@
};
lcdif: lcdif@30730000 {
- compatible = "fsl,imx7d-lcdif", "fsl,imx28-lcdif";
+ compatible = "fsl,imx7d-lcdif", "fsl,imx6sx-lcdif";
reg = <0x30730000 0x10000>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>,
@@ -1263,14 +1267,13 @@
};
};
- dma_apbh: dma-apbh@33000000 {
+ dma_apbh: dma-controller@33000000 {
compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh";
reg = <0x33000000 0x2000>;
interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3";
#dma-cells = <1>;
dma-channels = <4>;
clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>;
@@ -1279,7 +1282,7 @@
gpmi: nand-controller@33002000{
compatible = "fsl,imx7d-gpmi-nand";
#address-cells = <1>;
- #size-cells = <1>;
+ #size-cells = <0>;
reg = <0x33002000 0x2000>, <0x33004000 0x4000>;
reg-names = "gpmi-nand", "bch";
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi
index 78686fc72..c420c7c64 100644
--- a/arch/arm/boot/dts/rk3036.dtsi
+++ b/arch/arm/boot/dts/rk3036.dtsi
@@ -402,12 +402,20 @@
pinctrl-0 = <&hdmi_ctl>;
status = "disabled";
- hdmi_in: port {
+ ports {
#address-cells = <1>;
#size-cells = <0>;
- hdmi_in_vop: endpoint@0 {
+
+ hdmi_in: port@0 {
reg = <0>;
- remote-endpoint = <&vop_out_hdmi>;
+
+ hdmi_in_vop: endpoint {
+ remote-endpoint = <&vop_out_hdmi>;
+ };
+ };
+
+ hdmi_out: port@1 {
+ reg = <1>;
};
};
};
diff --git a/arch/arm/include/asm/irq_work.h b/arch/arm/include/asm/irq_work.h
index 3149e4dc1..889599983 100644
--- a/arch/arm/include/asm/irq_work.h
+++ b/arch/arm/include/asm/irq_work.h
@@ -9,6 +9,4 @@ static inline bool arch_irq_work_has_interrupt(void)
return is_smp();
}
-extern void arch_irq_work_raise(void);
-
#endif /* _ASM_ARM_IRQ_WORK_H */
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index e12d7d096..e4eb54f6c 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -11,7 +11,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(nop) "\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -25,7 +25,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(b) " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 95e731676..961daac65 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = {
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
GPIO_LOOKUP_IDX("G", 0, NULL, 1,
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ { }
},
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts
index 8ffbcb2b1..bbd3c05cb 100644
--- a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts
@@ -15,7 +15,7 @@
#size-cells = <2>;
aliases {
- serial0 = &uart_B;
+ serial0 = &uart_b;
};
memory@0 {
@@ -25,6 +25,6 @@
};
-&uart_B {
+&uart_b {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
index ad50cba42..372a03762 100644
--- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
@@ -118,14 +118,14 @@
<10 11 12 13 14 15 16 17 18 19 20 21>;
};
- uart_B: serial@7a000 {
+ uart_b: serial@7a000 {
compatible = "amlogic,meson-s4-uart",
"amlogic,meson-ao-uart";
reg = <0x0 0x7a000 0x0 0x18>;
interrupts = <GIC_SPI 169 IRQ_TYPE_EDGE_RISING>;
- status = "disabled";
clocks = <&xtal>, <&xtal>, <&xtal>;
clock-names = "xtal", "pclk", "baud";
+ status = "disabled";
};
reset: reset-controller@2000 {
diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
index 9d116e1fb..1ac4f8c24 100644
--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
@@ -169,10 +169,6 @@
};
};
-&blsp_dma {
- status = "okay";
-};
-
&blsp_i2c2 {
/* On Low speed expansion */
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index bafac2cf7..987cebbda 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -1522,7 +1522,7 @@
clock-names = "bam_clk";
#dma-cells = <1>;
qcom,ee = <0>;
- status = "disabled";
+ qcom,controlled-remotely;
};
blsp1_uart1: serial@78af000 {
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 9de2248a3..789121171 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -390,6 +390,19 @@
reg = <0x0 0x80000000 0x0 0x0>;
};
+ etm {
+ compatible = "qcom,coresight-remote-etm";
+
+ out-ports {
+ port {
+ modem_etm_out_funnel_in2: endpoint {
+ remote-endpoint =
+ <&funnel_in2_in_modem_etm>;
+ };
+ };
+ };
+ };
+
psci {
compatible = "arm,psci-1.0";
method = "smc";
@@ -2565,6 +2578,14 @@
clocks = <&rpmcc RPM_QDSS_CLK>, <&rpmcc RPM_QDSS_A_CLK>;
clock-names = "apb_pclk", "atclk";
+ in-ports {
+ port {
+ funnel_in2_in_modem_etm: endpoint {
+ remote-endpoint =
+ <&modem_etm_out_funnel_in2>;
+ };
+ };
+ };
out-ports {
port {
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index b00b8164c..7a4125053 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -1903,9 +1903,11 @@
cpu = <&CPU4>;
- port{
- etm4_out: endpoint {
- remote-endpoint = <&apss_funnel_in4>;
+ out-ports {
+ port{
+ etm4_out: endpoint {
+ remote-endpoint = <&apss_funnel_in4>;
+ };
};
};
};
@@ -1920,9 +1922,11 @@
cpu = <&CPU5>;
- port{
- etm5_out: endpoint {
- remote-endpoint = <&apss_funnel_in5>;
+ out-ports {
+ port{
+ etm5_out: endpoint {
+ remote-endpoint = <&apss_funnel_in5>;
+ };
};
};
};
@@ -1937,9 +1941,11 @@
cpu = <&CPU6>;
- port{
- etm6_out: endpoint {
- remote-endpoint = <&apss_funnel_in6>;
+ out-ports {
+ port{
+ etm6_out: endpoint {
+ remote-endpoint = <&apss_funnel_in6>;
+ };
};
};
};
@@ -1954,9 +1960,11 @@
cpu = <&CPU7>;
- port{
- etm7_out: endpoint {
- remote-endpoint = <&apss_funnel_in7>;
+ out-ports {
+ port{
+ etm7_out: endpoint {
+ remote-endpoint = <&apss_funnel_in7>;
+ };
};
};
};
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 4d5905ef0..95c515da9 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -4049,7 +4049,7 @@
assigned-clock-rates = <19200000>, <150000000>;
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>,
<&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>,
<&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
@@ -4100,7 +4100,7 @@
assigned-clock-rates = <19200000>, <150000000>;
interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>,
<&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 8efd0e227..eb1a93699 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -3629,7 +3629,7 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
<&pdc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
@@ -3678,7 +3678,7 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 7 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
index bfa358042..61f018644 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -607,6 +607,7 @@
clock-names = "spiclk", "apb_pclk";
dmas = <&dmac 12>, <&dmac 13>;
dma-names = "tx", "rx";
+ num-cs = <2>;
pinctrl-names = "default";
pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>;
#address-cells = <1>;
@@ -622,6 +623,7 @@
clock-names = "spiclk", "apb_pclk";
dmas = <&dmac 14>, <&dmac 15>;
dma-names = "tx", "rx";
+ num-cs = <2>;
pinctrl-names = "default";
pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>;
#address-cells = <1>;
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index bac4cabef..467ac2f76 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req)
src += blocks * AES_BLOCK_SIZE;
}
if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+
neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
nbytes, walk.iv);
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
+
nbytes = 0;
}
kernel_neon_end();
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 3622e9f4f..51738c56e 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -229,7 +229,7 @@ alternative_has_feature_likely(unsigned long feature)
compiletime_assert(feature < ARM64_NCAPS,
"feature must be < ARM64_NCAPS");
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops)
:
: [feature] "i" (feature)
@@ -247,7 +247,7 @@ alternative_has_feature_unlikely(unsigned long feature)
compiletime_assert(feature < ARM64_NCAPS,
"feature must be < ARM64_NCAPS");
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("nop", "b %l[l_yes]", %[feature])
:
: [feature] "i" (feature)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7dce9c0aa..af3a678a7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -128,6 +129,8 @@
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -179,6 +182,7 @@
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 62c846be2..a75c0772e 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si)
}
#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index d720b6f7e..da1841371 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -343,6 +343,7 @@ extern void sme_alloc(struct task_struct *task, bool flush);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
+extern void sme_suspend_exit(void);
/*
* Return how many bytes of memory are required to store the full SME
@@ -372,6 +373,7 @@ static inline int sme_max_vl(void) { return 0; }
static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
+static inline void sme_suspend_exit(void) { }
static inline size_t za_state_size(struct task_struct const *task)
{
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index 81bbfa3a0..a1020285e 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -2,8 +2,6 @@
#ifndef __ASM_IRQ_WORK_H
#define __ASM_IRQ_WORK_H
-extern void arch_irq_work_raise(void);
-
static inline bool arch_irq_work_has_interrupt(void)
{
return true;
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index cea441b6a..b5bd3c38a 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,7 +18,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
@@ -35,7 +35,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 61f22e9c9..74584597b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = {
static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
static const struct midr_range tsb_flush_fail_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2067961
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2054223
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = {
static struct midr_range trbe_write_out_of_range_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2253138
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 8c226d79a..59b5a16ba 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1347,6 +1347,20 @@ void __init sme_setup(void)
get_sme_default_vl());
}
+void sme_suspend_exit(void)
+{
+ u64 smcr = 0;
+
+ if (!system_supports_sme())
+ return;
+
+ if (system_supports_fa64())
+ smcr |= SMCR_ELx_FA64;
+
+ write_sysreg_s(smcr, SYS_SMCR_EL1);
+ write_sysreg_s(0, SYS_SMPRI_EL1);
+}
+
#endif /* CONFIG_ARM64_SME */
static void sve_init_regs(void)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 38dbd3828..d2850b49a 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/exception.h>
+#include <asm/numa.h>
#include <asm/vmap_stack.h>
#include <asm/softirq_stack.h>
@@ -46,17 +47,17 @@ static void init_irq_scs(void)
for_each_possible_cpu(cpu)
per_cpu(irq_shadow_call_stack_ptr, cpu) =
- scs_alloc(cpu_to_node(cpu));
+ scs_alloc(early_cpu_to_node(cpu));
}
#ifdef CONFIG_VMAP_STACK
-static void init_irq_stacks(void)
+static void __init init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
- p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 7b0643fe2..214b1805e 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -168,7 +168,11 @@ armv8pmu_events_sysfs_show(struct device *dev,
PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ /*
+ * Don't expose the sw_incr event in /sys. It's not usable as writes to
+ * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation
+ * means we don't have a fixed event<->counter relationship regardless.
+ */
ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 8b02d3108..064d996cc 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -11,6 +11,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
+#include <asm/fpsimd.h>
#include <asm/mte.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
@@ -77,6 +78,8 @@ void notrace __cpu_suspend_exit(void)
*/
spectre_v4_enable_mitigation(NULL);
+ sme_suspend_exit();
+
/* Restore additional feature-specific configuration */
ptrauth_suspend_exit();
}
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 36c8f66ca..d513533cc 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -68,11 +68,9 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -Wdeclaration-after-statement \
-std=gnu11
VDSO_CFLAGS += -O2
# Some useful compiler-dependent flags from top-level Makefile
-VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
VDSO_CFLAGS += -fno-strict-overflow
VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 3c344e4cd..092327665 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
}
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
+
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
@@ -1427,6 +1430,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
for (i = 0; i < irq_count; i++) {
irq = vgic_get_irq(kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
update_affinity(irq, vcpu2);
diff --git a/arch/csky/include/asm/irq_work.h b/arch/csky/include/asm/irq_work.h
index 33aaf39d6..d39fcc1f5 100644
--- a/arch/csky/include/asm/irq_work.h
+++ b/arch/csky/include/asm/irq_work.h
@@ -7,5 +7,5 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return true;
}
-extern void arch_irq_work_raise(void);
+
#endif /* __ASM_CSKY_IRQ_WORK_H */
diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h
index 98a3f4b16..ef2e37a10 100644
--- a/arch/csky/include/asm/jump_label.h
+++ b/arch/csky/include/asm/jump_label.h
@@ -12,7 +12,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop32 \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
@@ -29,7 +29,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: bsr32 %l[label] \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e737dc8cd..fa3171f56 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -9,6 +9,7 @@ config LOONGARCH
select ARCH_BINFMT_ELF_STATE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
@@ -80,6 +81,7 @@ config LOONGARCH
select GPIOLIB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
@@ -461,23 +463,6 @@ config PHYSICAL_START
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel).
-config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- default y
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y. Only embedded should say N here.
-
endmenu
config ARCH_SELECT_MEMORY_MODEL
@@ -495,10 +480,6 @@ config ARCH_SPARSEMEM_ENABLE
or have huge holes in the physical address space for other reasons.
See <file:Documentation/mm/numa.rst> for more.
-config ARCH_ENABLE_THP_MIGRATION
- def_bool y
- depends on TRANSPARENT_HUGEPAGE
-
config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 434bfc1cd..18a2b37f4 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -297,6 +297,7 @@ void play_dead(void)
addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
} while (addr == 0);
+ local_irq_disable();
init_fn = (void *)TO_CACHE(addr);
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
@@ -473,7 +474,6 @@ asmlinkage void start_secondary(void)
sync_counter();
cpu = raw_smp_processor_id();
set_my_cpu_offset(per_cpu_offset(cpu));
- rcu_cpu_starting(cpu);
cpu_probe();
constant_clockevent_init();
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index da3681f13..eeb2d815c 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -271,12 +271,16 @@ void setup_tlb_handler(int cpu)
set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
- }
+ } else {
+ int vec_sz __maybe_unused;
+ void *addr __maybe_unused;
+ struct page *page __maybe_unused;
+
+ /* Avoid lockdep warning */
+ rcu_cpu_starting(cpu);
+
#ifdef CONFIG_NUMA
- else {
- void *addr;
- struct page *page;
- const int vec_sz = sizeof(exception_handlers);
+ vec_sz = sizeof(exception_handlers);
if (pcpu_handlers[cpu])
return;
@@ -292,8 +296,8 @@ void setup_tlb_handler(int cpu)
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
- }
#endif
+ }
}
void tlb_init(int cpu)
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 4044eaf98..0921ddda1 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
" .set pop"
: "=&r" (sum), "=&r" (tmp)
: "r" (saddr), "r" (daddr),
- "0" (htonl(len)), "r" (htonl(proto)), "r" (sum));
+ "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)
+ : "memory");
return csum_fold(sum);
}
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index c5c6864e6..405c85173 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -36,7 +36,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
+ asm goto("1:\t" B_INSN " 2f\n\t"
"2:\t.insn\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
@@ -50,7 +50,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
+ asm goto("1:\t" J_INSN " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 246c6a6b0..5b778995d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64];
void reserve_exception_space(phys_addr_t addr, unsigned long size)
{
- memblock_reserve(addr, size);
+ /*
+ * reserve exception space on CPUs other than CPU0
+ * is too late, since memblock is unavailable when APs
+ * up
+ */
+ if (smp_processor_id() == 0)
+ memblock_reserve(addr, size);
}
void __init *set_except_vector(int n, void *addr)
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 345d5e021..abf39ecda 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -24,7 +24,6 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
- select BUILDTIME_TABLE_SORT
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_KERNEL_BZIP2
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 74d17d7e7..5937d5eda 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -576,6 +576,7 @@
.section __ex_table,"aw" ! \
.align 4 ! \
.word (fault_addr - .), (except_addr - .) ! \
+ or %r0,%r0,%r0 ! \
.previous
diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h
new file mode 100644
index 000000000..4ea23e3d7
--- /dev/null
+++ b/arch/parisc/include/asm/extable.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_EXTABLE_H
+#define __PARISC_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
+/*
+ * The exception table consists of three addresses:
+ *
+ * - A relative address to the instruction that is allowed to fault.
+ * - A relative address at which the program should continue (fixup routine)
+ * - An asm statement which specifies which CPU register will
+ * receive -EFAULT when an exception happens if the lowest bit in
+ * the fixup address is set.
+ *
+ * Note: The register specified in the err_opcode instruction will be
+ * modified at runtime if a fault happens. Register %r0 will be ignored.
+ *
+ * Since relative addresses are used, 32bit values are sufficient even on
+ * 64bit kernel.
+ */
+
+struct pt_regs;
+int fixup_exception(struct pt_regs *regs);
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+struct exception_table_entry {
+ int insn; /* relative address of insn that is allowed to fault. */
+ int fixup; /* relative address of fixup routine */
+ int err_opcode; /* sample opcode with register which holds error code */
+};
+
+#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\
+ ".section __ex_table,\"aw\"\n" \
+ ".align 4\n" \
+ ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \
+ opcode "\n" \
+ ".previous\n"
+
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT on fault into the register specified by the err_opcode instruction,
+ * and zeroes the target register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_VAR(__err_var) \
+ int __err_var = 0
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\
+ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register)
+
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ struct exception_table_entry *b,
+ struct exception_table_entry tmp,
+ int delta)
+{
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+ a->err_opcode = b->err_opcode;
+ b->err_opcode = tmp.err_opcode;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#endif
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 94428798b..317ebc5ed 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -12,7 +12,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
@@ -29,7 +29,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b,n %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index c822bd0c0..51f40eaf7 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -8,7 +8,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
@@ -22,7 +23,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%%sr3,%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 416507989..88d0ae576 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
*/
#include <asm/page.h>
#include <asm/cache.h>
+#include <asm/extable.h>
#include <linux/bug.h>
#include <linux/string.h>
@@ -26,37 +27,6 @@
#define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr)
#endif
-/*
- * The exception table contains two values: the first is the relative offset to
- * the address of the instruction that is allowed to fault, and the second is
- * the relative offset to the address of the fixup routine. Since relative
- * addresses are used, 32bit values are sufficient even on 64bit kernel.
- */
-
-#define ARCH_HAS_RELATIVE_EXTABLE
-struct exception_table_entry {
- int insn; /* relative address of insn that is allowed to fault. */
- int fixup; /* relative address of fixup routine */
-};
-
-#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
- ".section __ex_table,\"aw\"\n" \
- ".align 4\n" \
- ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
- ".previous\n"
-
-/*
- * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
- * (with lowest bit set) for which the fault handler in fixup_exception() will
- * load -EFAULT into %r29 for a read or write fault, and zeroes the target
- * register in case of a read fault in get_user().
- */
-#define ASM_EXCEPTIONTABLE_REG 29
-#define ASM_EXCEPTIONTABLE_VAR(__variable) \
- register long __variable __asm__ ("r29") = 0
-#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
- ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
-
#define __get_user_internal(sr, val, ptr) \
({ \
ASM_EXCEPTIONTABLE_VAR(__gu_err); \
@@ -83,7 +53,7 @@ struct exception_table_entry {
\
__asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
: "=r"(__gu_val), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -115,8 +85,8 @@ struct exception_table_entry {
"1: ldw 0(%%sr%2,%3),%0\n" \
"2: ldw 4(%%sr%2,%3),%R0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \
: "=&r"(__gu_tmp.l), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -174,7 +144,7 @@ struct exception_table_entry {
__asm__ __volatile__ ( \
"1: " stx " %1,0(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(x), "i"(sr), "r"(ptr))
@@ -186,15 +156,14 @@ struct exception_table_entry {
"1: stw %1,0(%%sr%2,%3)\n" \
"2: stw %R1,4(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(__val), "i"(sr), "r"(ptr)); \
} while (0)
#endif /* !defined(CONFIG_64BIT) */
-
/*
* Complex access routines -- external declarations
*/
@@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
-struct pt_regs;
-int fixup_exception(struct pt_regs *regs);
-
#endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 8f12b9f31..a58292873 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -1003,6 +1003,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
pr_info("\n");
+ /* Prevent hung task messages when printing on serial console */
+ cond_resched();
+
pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n",
hpa, parisc_hardware_description(&dev->id));
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 1f6c776d8..c67883487 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -171,7 +171,6 @@ static int __init processor_probe(struct parisc_device *dev)
p->cpu_num = cpu_info.cpu_num;
p->cpu_loc = cpu_info.cpu_loc;
- set_cpu_possible(cpuid, true);
store_cpu_topology(cpuid);
#ifdef CONFIG_SMP
@@ -466,13 +465,6 @@ static struct parisc_driver cpu_driver __refdata = {
*/
void __init processor_init(void)
{
- unsigned int cpu;
-
reset_cpu_topology();
-
- /* reset possible mask. We will mark those which are possible. */
- for_each_possible_cpu(cpu)
- set_cpu_possible(cpu, false);
-
register_parisc_driver(&cpu_driver);
}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index e8a4d77cf..8a8e7d722 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -118,8 +118,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
"2: ldbs 1(%%sr1,%3), %0\n"
" depw %2, 23, 24, %0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1)
: "r" (saddr), "r" (regs->isr) );
@@ -150,8 +150,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
" mtctl %2,11\n"
" vshd %0,%3,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
: "r" (saddr), "r" (regs->isr) );
@@ -187,8 +187,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" mtsar %%r19\n"
" shrpd %0,%%r20,%%sar,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "=r" (val), "+r" (ret)
: "0" (val), "r" (saddr), "r" (regs->isr)
: "r19", "r20" );
@@ -207,9 +207,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" vshd %0,%R0,%0\n"
" vshd %R0,%4,%R0\n"
"4: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
: "r" (regs->isr) );
}
@@ -242,8 +242,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
"1: stb %1, 0(%%sr1, %3)\n"
"2: stb %2, 1(%%sr1, %3)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret), "=&r" (temp1)
: "r" (val), "r" (regs->ior), "r" (regs->isr) );
@@ -283,8 +283,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
" stw %%r20,0(%%sr1,%2)\n"
" stw %%r21,4(%%sr1,%2)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -327,10 +327,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"3: std %%r20,0(%%sr1,%2)\n"
"4: std %%r21,8(%%sr1,%2)\n"
"5: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -356,11 +356,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"4: stw %%r1,4(%%sr1,%3)\n"
"5: stw %2,8(%%sr1,%3)\n"
"6: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0")
: "+r" (ret)
: "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r1" );
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index b00aa98b5..fbd9ada5e 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs)
* Fix up get_user() and put_user().
* ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
* bit in the relative address of the fixup routine to indicate
- * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
- * -EFAULT to report a userspace access error.
+ * that the register encoded in the "or %r0,%r0,register"
+ * opcode should be loaded with -EFAULT to report a userspace
+ * access error.
*/
if (fix->fixup & 1) {
- regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
+ int fault_error_reg = fix->err_opcode & 0x1f;
+ if (!WARN_ON(!fault_error_reg))
+ regs->gr[fault_error_reg] = -EFAULT;
+ pr_debug("Unalignment fixup of register %d at %pS\n",
+ fault_error_reg, (void*)regs->iaoq[0]);
/* zero target register for get_user() */
if (parisc_acctyp(0, regs->iir) == VM_READ) {
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 61a473635..20d5052e2 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -74,7 +74,7 @@
##__VA_ARGS__)
#define WARN_ENTRY(insn, flags, label, ...) \
- asm_volatile_goto( \
+ asm goto( \
"1: " insn "\n" \
EX_TABLE(1b, %l[label]) \
_EMIT_BUG_ENTRY \
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index b8b0be8f1..c6d3078bd 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -6,6 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return true;
}
-extern void arch_irq_work_raise(void);
#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 93ce3ec25..2f2a86ed2 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
@@ -32,7 +32,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 94b981152..07fabb054 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -417,5 +417,9 @@ extern void *abatron_pteptrs[2];
#include <asm/nohash/mmu.h>
#endif
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 4c6c6dbd1..da827d2d0 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -42,14 +42,6 @@ u64 memory_hotplug_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_FA_DUMP
-#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end,
- int nid, pgprot_t prot);
-#endif
#endif /* __KERNEL__ */
#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index af58f1ed3..c4b798aa6 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -14,7 +14,7 @@
#ifdef __KERNEL__
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15
#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
#else
#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 3ddc65c63..45d4c9cf3 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -72,7 +72,7 @@ __pu_failed: \
* are no aliasing issues.
*/
#define __put_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -85,7 +85,7 @@ __pu_failed: \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
@@ -132,7 +132,7 @@ do { \
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: "op"%U1%X1 %0, %1 # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@@ -145,7 +145,7 @@ do { \
__get_user_asm_goto(x, addr, label, "ld")
#else /* __powerpc64__ */
#define __get_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: lwz%X1 %0, %1\n" \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
index ceb06b109..2ae8e9a7b 100644
--- a/arch/powerpc/kernel/cpu_specs_e500mc.h
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -8,7 +8,8 @@
#ifdef CONFIG_PPC64
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
- PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+ PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \
+ PPC_FEATURE_BOOKE)
#else
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
PPC_FEATURE_BOOKE)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index a019ed6fc..26c151e2a 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
- std r11,_NIP(r1)
+ std r11,_LINK(r1)
+ std r11,_NIP(r1) /* Saved LR is also the next instruction */
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
@@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
std r11,_XER(r1)
- std r11,_LINK(r1)
std r11,_CTR(r1)
li r11,\trapnr
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index 9dc0ad3c5..5a6e44e4d 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -230,7 +230,7 @@ again:
* This allows interrupts to be unmasked without hard disabling, and
* also without new hard interrupts coming in ahead of pending ones.
*/
- asm_volatile_goto(
+ asm goto(
"1: \n"
" lbz 9,%0(13) \n"
" cmpwi 9,0 \n"
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3956f3268..362b71238 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs)
return -EINVAL;
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return is_kernel_addr(addr);
}
+#endif
#ifdef CONFIG_MATH_EMULATION
static int emulate_math(struct pt_regs *regs)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 398b5694a..ec30af8ea 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u = {};
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
@@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u;
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index f6151a589..87aa76c73 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -463,6 +463,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
set_pte_at(vma->vm_mm, addr, ptep, pte);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* For hash translation mode, we use the deposited table to store hash slot
* information and they are stored at PTRS_PER_PMD offset from related pmd
@@ -484,6 +485,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
}
+#endif
/*
* Does the CPU support tlbie?
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 119ef491f..d3a7726ec 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift)
* as to leave enough 0 bits in the address to contain it. */
unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
HUGEPD_SHIFT_MASK + 1);
- struct kmem_cache *new;
+ struct kmem_cache *new = NULL;
/* It would be nice if this was a BUILD_BUG_ON(), but at the
* moment, gcc doesn't seem to recognize is_power_of_2 as a
@@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift)
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
- new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
+ if (name)
+ new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
if (!new)
panic("Could not allocate pgtable cache for order %d", shift);
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index a70828a6d..aa9aa1192 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size)
if (ret)
return ret;
+ k_start = k_start & PAGE_MASK;
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
if (!block)
return -ENOMEM;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index bd9784f77..71250605b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -179,3 +179,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void)
{
return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot);
+#endif
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 97b026130..1e5f083cd 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb,
struct iommu_table_ops iommu_table_lpar_multi_ops;
-/*
- * iommu_table_setparms_lpar
- *
- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
- */
-static void iommu_table_setparms_lpar(struct pci_controller *phb,
- struct device_node *dn,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group,
- const __be32 *dma_window)
-{
- unsigned long offset, size, liobn;
-
- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
-
- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
- &iommu_table_lpar_multi_ops);
-
-
- table_group->tce32_start = offset;
- table_group->tce32_size = size;
-}
-
struct iommu_table_ops iommu_table_pseries_ops = {
.set = tce_build_pSeries,
.clear = tce_free_pSeries,
@@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
* dynamic 64bit DMA window, walking up the device tree.
*/
static struct device_node *pci_dma_find(struct device_node *dn,
- const __be32 **dma_window)
+ struct dynamic_dma_window_prop *prop)
{
- const __be32 *dw = NULL;
+ const __be32 *default_prop = NULL;
+ const __be32 *ddw_prop = NULL;
+ struct device_node *rdn = NULL;
+ bool default_win = false, ddw_win = false;
for ( ; dn && PCI_DN(dn); dn = dn->parent) {
- dw = of_get_property(dn, "ibm,dma-window", NULL);
- if (dw) {
- if (dma_window)
- *dma_window = dw;
- return dn;
+ default_prop = of_get_property(dn, "ibm,dma-window", NULL);
+ if (default_prop) {
+ rdn = dn;
+ default_win = true;
+ }
+ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
+ }
+ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
}
- dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
- if (dw)
- return dn;
- dw = of_get_property(dn, DMA64_PROPNAME, NULL);
- if (dw)
- return dn;
+
+ /* At least found default window, which is the case for normal boot */
+ if (default_win)
+ break;
}
- return NULL;
+ /* For PCI devices there will always be a DMA window, either on the device
+ * or parent bus
+ */
+ WARN_ON(!(default_win | ddw_win));
+
+ /* caller doesn't want to get DMA window property */
+ if (!prop)
+ return rdn;
+
+ /* parse DMA window property. During normal system boot, only default
+ * DMA window is passed in OF. But, for kdump, a dedicated adapter might
+ * have both default and DDW in FDT. In this scenario, DDW takes precedence
+ * over default window.
+ */
+ if (ddw_win) {
+ struct dynamic_dma_window_prop *p;
+
+ p = (struct dynamic_dma_window_prop *)ddw_prop;
+ prop->liobn = p->liobn;
+ prop->dma_base = p->dma_base;
+ prop->tce_shift = p->tce_shift;
+ prop->window_shift = p->window_shift;
+ } else if (default_win) {
+ unsigned long offset, size, liobn;
+
+ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
+
+ prop->liobn = cpu_to_be32((u32)liobn);
+ prop->dma_base = cpu_to_be64(offset);
+ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
+ prop->window_shift = cpu_to_be32(order_base_2(size));
+ }
+
+ return rdn;
}
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
@@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
struct iommu_table *tbl;
struct device_node *dn, *pdn;
struct pci_dn *ppci;
- const __be32 *dma_window = NULL;
+ struct dynamic_dma_window_prop prop;
dn = pci_bus_to_OF_node(bus);
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);
- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);
- if (dma_window == NULL)
- pr_debug(" no ibm,dma-window property !\n");
+ /* In PPC architecture, there will always be DMA window on bus or one of the
+ * parent bus. During reboot, there will be ibm,dma-window property to
+ * define DMA window. For kdump, there will at least be default window or DDW
+ * or both.
+ */
ppci = PCI_DN(pdn);
@@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
- if (dma_window) {
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
- ppci->table_group, dma_window);
- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
- panic("Failed to initialize iommu table");
- }
+ iommu_table_setparms_common(tbl, ppci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
+
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name)
continue;
}
+ /* If at the time of system initialization, there are DDWs in OF,
+ * it means this is during kexec. DDW could be direct or dynamic.
+ * We will just mark DDWs as "dynamic" since this is kdump path,
+ * no need to worry about perforance. ddw_list_new_entry() will
+ * set window->direct = false.
+ */
window = ddw_list_new_entry(pdn, dma64);
if (!window) {
of_node_put(pdn);
@@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
{
struct device_node *pdn, *dn;
struct iommu_table *tbl;
- const __be32 *dma_window = NULL;
struct pci_dn *pci;
+ struct dynamic_dma_window_prop prop;
pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
@@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %pOF\n", dn);
- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
if (!pci->table_group) {
pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
tbl = pci->table_group->tables[0];
- iommu_table_setparms_lpar(pci->phb, pdn, tbl,
- pci->table_group, dma_window);
+
+ iommu_table_setparms_common(tbl, pci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 541199c6a..5186d65d7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -660,8 +660,12 @@ u64 pseries_paravirt_steal_clock(int cpu)
{
struct lppaca *lppaca = &lppaca_of(cpu);
- return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
- be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+ /*
+ * VPA steal time counters are reported at TB frequency. Hence do a
+ * conversion to ns before returning
+ */
+ return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)));
}
#endif
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index d47d87c2d..dcf1bc9de 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -25,6 +25,11 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#ifndef __ASSEMBLY__
+
+extern void *return_address(unsigned int level);
+
+#define ftrace_return_address(n) return_address(n)
+
void MCOUNT_NAME(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h
index b53891964..b27a4d64f 100644
--- a/arch/riscv/include/asm/irq_work.h
+++ b/arch/riscv/include/asm/irq_work.h
@@ -6,5 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return IS_ENABLED(CONFIG_SMP);
}
-extern void arch_irq_work_raise(void);
+
#endif /* _ASM_RISCV_IRQ_WORK_H */
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
index 14a5ea8d8..4a35d787c 100644
--- a/arch/riscv/include/asm/jump_label.h
+++ b/arch/riscv/include/asm/jump_label.h
@@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
@@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 59bb53da4..63055c6ad 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -79,7 +79,7 @@
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
*/
-#define vmemmap ((struct page *)VMEMMAP_START)
+#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index ab333cb79..4c0805d26 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
endif
CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
@@ -41,6 +42,7 @@ obj-y += irq.o
obj-y += process.o
obj-y += ptrace.o
obj-y += reset.o
+obj-y += return_address.o
obj-y += setup.o
obj-y += signal.o
obj-y += syscall_table.o
diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c
new file mode 100644
index 000000000..c8115ec8f
--- /dev/null
+++ b/arch/riscv/kernel/return_address.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code come from arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2023 SiFive.
+ */
+
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static bool save_return_addr(void *d, unsigned long pc)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)pc;
+ return false;
+ }
+
+ --data->level;
+
+ return true;
+}
+NOKPROBE_SYMBOL(save_return_addr);
+
+noinline void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+
+ data.level = level + 3;
+ data.addr = NULL;
+
+ arch_stack_walk(save_return_addr, &data, current, NULL);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+
+}
+EXPORT_SYMBOL_GPL(return_address);
+NOKPROBE_SYMBOL(return_address);
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
index 603783766..f00c9f610 100644
--- a/arch/s390/include/asm/irq_work.h
+++ b/arch/s390/include/asm/irq_work.h
@@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void)
return true;
}
-void arch_irq_work_raise(void);
-
#endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 895f774bb..bf78cf381 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,%l[label]\n"
+ asm goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 15,%l[label]\n"
+ asm goto("0: brcl 15,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b1e98a9ed..09abf0003 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -777,6 +777,13 @@ struct kvm_vm_stat {
u64 inject_service_signal;
u64 inject_virtio;
u64 aen_forward;
+ u64 gmap_shadow_create;
+ u64 gmap_shadow_reuse;
+ u64 gmap_shadow_r1_entry;
+ u64 gmap_shadow_r2_entry;
+ u64 gmap_shadow_r3_entry;
+ u64 gmap_shadow_sg_entry;
+ u64 gmap_shadow_pg_entry;
};
struct kvm_arch_memory_slot {
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 092b16b4d..6b442edb3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -385,6 +385,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/*
* floating point control reg. is in the thread structure
*/
+ save_fpu_regs();
if ((unsigned int) data != 0 ||
test_fp_ctl(data >> (BITS_PER_LONG - 32)))
return -EINVAL;
@@ -741,6 +742,7 @@ static int __poke_user_compat(struct task_struct *child,
/*
* floating point control reg. is in the thread structure
*/
+ save_fpu_regs();
if (test_fp_ctl(tmp))
return -EINVAL;
child->thread.fpu.fpc = data;
@@ -904,9 +906,7 @@ static int s390_fpregs_set(struct task_struct *target,
int rc = 0;
freg_t fprs[__NUM_FPRS];
- if (target == current)
- save_fpu_regs();
-
+ save_fpu_regs();
if (MACHINE_HAS_VX)
convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
else
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0243b6e38..3beceff5f 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
int *fake)
{
+ struct kvm *kvm;
struct gmap *parent;
union asce asce;
union vaddress vaddr;
@@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
*fake = 0;
*dat_protection = 0;
+ kvm = sg->private;
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
@@ -1341,6 +1343,7 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r1_entry++;
}
fallthrough;
case ASCE_TYPE_REGION2: {
@@ -1369,6 +1372,7 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r2_entry++;
}
fallthrough;
case ASCE_TYPE_REGION3: {
@@ -1406,6 +1410,7 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r3_entry++;
}
fallthrough;
case ASCE_TYPE_SEGMENT: {
@@ -1439,6 +1444,7 @@ shadow_pgt:
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_sg_entry++;
}
}
/* Return the parent address of the page table */
@@ -1509,6 +1515,7 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+ vcpu->kvm->stat.gmap_shadow_pg_entry++;
ipte_unlock(vcpu->kvm);
mmap_read_unlock(sg->mm);
return rc;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 377536347..348d49268 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
STATS_DESC_COUNTER(VM, inject_virtio),
- STATS_DESC_COUNTER(VM, aen_forward)
+ STATS_DESC_COUNTER(VM, aen_forward),
+ STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
+ STATS_DESC_COUNTER(VM, gmap_shadow_create),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
};
const struct kvm_stats_header kvm_vm_stats_header = {
@@ -4138,10 +4145,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_load(vcpu);
- if (test_fp_ctl(fpu->fpc)) {
- ret = -EINVAL;
- goto out;
- }
vcpu->run->s.regs.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
@@ -4149,7 +4152,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-out:
vcpu_put(vcpu);
return ret;
}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 740f8b56e..d90c818a9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1206,15 +1206,17 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
* we're holding has been unshadowed. If the gmap is still valid,
* we can safely reuse it.
*/
- if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+ if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
+ vcpu->kvm->stat.gmap_shadow_reuse++;
return 0;
+ }
/* release the old shadow - if any, and mark the prefix as unmapped */
release_gmap_shadow(vsie_page);
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
if (IS_ERR(gmap))
return PTR_ERR(gmap);
- gmap->private = vcpu->kvm;
+ vcpu->kvm->stat.gmap_shadow_create++;
WRITE_ONCE(vsie_page->gmap, gmap);
return 0;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 243f673fa..662cf23a1 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
return ERR_PTR(-ENOMEM);
new->mm = parent->mm;
new->parent = gmap_get(parent);
+ new->private = parent->private;
new->orig_asce = asce;
new->edat_level = edat_level;
new->initialized = false;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2c99f9552..394c69fda 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
/* combine single writes by using store-block insn */
void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- zpci_memcpy_toio(to, from, count);
+ zpci_memcpy_toio(to, from, count * 8);
}
static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 94eb529dc..2718cbea8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -10,7 +10,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
@@ -26,7 +26,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes]\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 3dbd0e3b6..778c50f27 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -118,7 +118,9 @@ archprepare:
$(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
+ifdef CONFIG_LD_SCRIPT_DYN
+LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
+endif
LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 3d7836c46..cabcc501b 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -204,7 +204,7 @@ static int uml_net_close(struct net_device *dev)
return 0;
}
-static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct uml_net_private *lp = netdev_priv(dev);
unsigned long flags;
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
index 4b6d1b526..66fe06db8 100644
--- a/arch/um/include/asm/cpufeature.h
+++ b/arch/um/include/asm/cpufeature.h
@@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
+ asm goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index d8b8b4f07..444bae755 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -50,7 +50,7 @@ extern void do_uml_exitcalls(void);
* Are we disallowed to sleep? Used to choose between GFP_KERNEL and
* GFP_ATOMIC.
*/
-extern int __cant_sleep(void);
+extern int __uml_cant_sleep(void);
extern int get_current_pid(void);
extern int copy_from_user_proc(void *to, void *from, int size);
extern char *uml_strdup(const char *string);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 010bc422a..a351c87db 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -220,7 +220,7 @@ void arch_cpu_idle(void)
raw_local_irq_enable();
}
-int __cant_sleep(void) {
+int __uml_cant_sleep(void) {
return in_atomic() || irqs_disabled() || in_interrupt();
/* Is in_interrupt() really needed? */
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index fddd1dec2..3e270da6b 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -432,9 +432,29 @@ static void time_travel_update_time(unsigned long long next, bool idle)
time_travel_del_event(&ne);
}
+static void time_travel_update_time_rel(unsigned long long offs)
+{
+ unsigned long flags;
+
+ /*
+ * Disable interrupts before calculating the new time so
+ * that a real timer interrupt (signal) can't happen at
+ * a bad time e.g. after we read time_travel_time but
+ * before we've completed updating the time.
+ */
+ local_irq_save(flags);
+ time_travel_update_time(time_travel_time + offs, false);
+ local_irq_restore(flags);
+}
+
void time_travel_ndelay(unsigned long nsec)
{
- time_travel_update_time(time_travel_time + nsec, false);
+ /*
+ * Not strictly needed to use _rel() version since this is
+ * only used in INFCPU/EXT modes, but it doesn't hurt and
+ * is more readable too.
+ */
+ time_travel_update_time_rel(nsec);
}
EXPORT_SYMBOL(time_travel_ndelay);
@@ -568,7 +588,11 @@ static void time_travel_set_start(void)
#define time_travel_time 0
#define time_travel_ext_waiting 0
-static inline void time_travel_update_time(unsigned long long ns, bool retearly)
+static inline void time_travel_update_time(unsigned long long ns, bool idle)
+{
+}
+
+static inline void time_travel_update_time_rel(unsigned long long offs)
{
}
@@ -720,9 +744,7 @@ static u64 timer_read(struct clocksource *cs)
*/
if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
!time_travel_ext_waiting)
- time_travel_update_time(time_travel_time +
- TIMER_MULTIPLIER,
- false);
+ time_travel_update_time_rel(TIMER_MULTIPLIER);
return time_travel_time / TIMER_MULTIPLIER;
}
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index b459745f5..3cb8ac63b 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -46,7 +46,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
unsigned long stack, sp;
int pid, fds[2], ret, n;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
@@ -70,7 +70,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
data.pre_data = pre_data;
data.argv = argv;
data.fd = fds[1];
- data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
+ data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
pid = clone(helper_child, (void *) sp, CLONE_VM, &data);
if (pid < 0) {
@@ -121,7 +121,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
unsigned long stack, sp;
int pid, status, err;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index fc0f2a9de..1dca4ffbd 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -173,23 +173,38 @@ __uml_setup("quiet", quiet_cmd_param,
"quiet\n"
" Turns off information messages during boot.\n\n");
+/*
+ * The os_info/os_warn functions will be called by helper threads. These
+ * have a very limited stack size and using the libc formatting functions
+ * may overflow the stack.
+ * So pull in the kernel vscnprintf and use that instead with a fixed
+ * on-stack buffer.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
void os_info(const char *fmt, ...)
{
+ char buf[256];
va_list list;
+ int len;
if (quiet_info)
return;
va_start(list, fmt);
- vfprintf(stderr, fmt, list);
+ len = vscnprintf(buf, sizeof(buf), fmt, list);
+ fwrite(buf, len, 1, stderr);
va_end(list);
}
void os_warn(const char *fmt, ...)
{
+ char buf[256];
va_list list;
+ int len;
va_start(list, fmt);
- vfprintf(stderr, fmt, list);
+ len = vscnprintf(buf, sizeof(buf), fmt, list);
+ fwrite(buf, len, 1, stderr);
va_end(list);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4c9bfc4be..5caa023e9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1982,6 +1982,23 @@ config EFI_STUB
See Documentation/admin-guide/efi-stub.rst for more information.
+config EFI_HANDOVER_PROTOCOL
+ bool "EFI handover protocol (DEPRECATED)"
+ depends on EFI_STUB
+ default y
+ help
+ Select this in order to include support for the deprecated EFI
+ handover protocol, which defines alternative entry points into the
+ EFI stub. This is a practice that has no basis in the UEFI
+ specification, and requires a priori knowledge on the part of the
+ bootloader about Linux/x86 specific ways of passing the command line
+ and initrd, and where in memory those assets may be loaded.
+
+ If in doubt, say Y. Even though the corresponding support is not
+ present in upstream GRUB or other bootloaders, most distros build
+ GRUB with numerous downstream patches applied, and may rely on the
+ handover protocol as as result.
+
config EFI_MIXED
bool "EFI mixed-mode support"
depends on EFI_STUB && X86_64
@@ -2548,6 +2565,17 @@ config GDS_FORCE_MITIGATION
If in doubt, say N.
+config MITIGATION_RFDS
+ bool "RFDS Mitigation"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for Register File Data Sampling (RFDS) by default.
+ RFDS is a hardware vulnerability which affects Intel Atom CPUs. It
+ allows unprivileged speculative access to stale data previously
+ stored in floating point, vector and integer registers.
+ See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 542377cd4..ce5ed2c2d 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -375,7 +375,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 15b7b403a..3965b2c9e 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
ifeq ($(CONFIG_LD_IS_BFD),y)
LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
endif
+ifeq ($(CONFIG_EFI_STUB),y)
+# ensure that the static EFI stub library will be pulled in, even if it is
+# never referenced explicitly from the startup code
+LDFLAGS_vmlinux += -u efi_pe_entry
+endif
LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
@@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/ident_map_64.o
vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
- vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
endif
@@ -108,11 +113,11 @@ endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
-vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
-efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
-$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 9caf89063..55c98fdd6 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
* Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
* ACPI_TABLE_GUID because it has more features.
*/
- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
ACPI_20_TABLE_GUID);
if (rsdp_addr)
return (acpi_physical_address)rsdp_addr;
/* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
ACPI_TABLE_GUID);
if (rsdp_addr)
return (acpi_physical_address)rsdp_addr;
@@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void)
enum efi_type et;
int ret;
- et = efi_get_type(boot_params);
+ et = efi_get_type(boot_params_ptr);
if (et == EFI_TYPE_NONE)
return 0;
- systab_pa = efi_get_system_table(boot_params);
+ systab_pa = efi_get_system_table(boot_params_ptr);
if (!systab_pa)
error("EFI support advertised, but unable to locate system table.");
- ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+ ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len);
if (ret || !cfg_tbl_pa)
error("EFI config table not found.");
@@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void)
{
acpi_physical_address pa;
- pa = boot_params->acpi_rsdp_addr;
+ pa = boot_params_ptr->acpi_rsdp_addr;
if (!pa)
pa = efi_get_rsdp_addr();
@@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void)
rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
if (!rsdp)
rsdp = (struct acpi_table_rsdp *)(long)
- boot_params->acpi_rsdp_addr;
+ boot_params_ptr->acpi_rsdp_addr;
if (!rsdp)
return 0;
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index f1add5d85..c1bb18097 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr)
#include "../cmdline.c"
unsigned long get_cmd_line_ptr(void)
{
- unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
+ unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr;
- cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
+ cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32;
return cmd_line_ptr;
}
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
new file mode 100644
index 000000000..8232c5b2a
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT and IDT before we make EFI service
+ * calls.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identity
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+ .code64
+ .text
+/*
+ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
+ * is the first thing that runs after switching to long mode. Depending on
+ * whether the EFI handover protocol or the compat entry point was used to
+ * enter the kernel, it will either branch to the common 64-bit EFI stub
+ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
+ * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
+ * struct bootparams pointer as the third argument, so the presence of such a
+ * pointer is used to disambiguate.
+ *
+ * +--------------+
+ * +------------------+ +------------+ +------>| efi_pe_entry |
+ * | efi32_pe_entry |---->| | | +-----------+--+
+ * +------------------+ | | +------+----------------+ |
+ * | startup_32 |---->| startup_64_mixed_mode | |
+ * +------------------+ | | +------+----------------+ |
+ * | efi32_stub_entry |---->| | | |
+ * +------------------+ +------------+ | |
+ * V |
+ * +------------+ +----------------+ |
+ * | startup_64 |<----| efi_stub_entry |<--------+
+ * +------------+ +----------------+
+ */
+SYM_FUNC_START(startup_64_mixed_mode)
+ lea efi32_boot_args(%rip), %rdx
+ mov 0(%rdx), %edi
+ mov 4(%rdx), %esi
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ mov 8(%rdx), %edx // saved bootparams pointer
+ test %edx, %edx
+ jnz efi_stub_entry
+#endif
+ /*
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+ * shadow space on the stack even if all arguments are passed in
+ * registers. We also need an additional 8 bytes for the space that
+ * would be occupied by the return address, and this also results in
+ * the correct stack alignment for entry.
+ */
+ sub $40, %rsp
+ mov %rdi, %rcx // MS calling convention
+ mov %rsi, %rdx
+ jmp efi_pe_entry
+SYM_FUNC_END(startup_64_mixed_mode)
+
+SYM_FUNC_START(__efi64_thunk)
+ push %rbp
+ push %rbx
+
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ /* Copy args passed on stack */
+ movq 0x30(%rsp), %rbp
+ movq 0x38(%rsp), %rbx
+ movq 0x40(%rsp), %rax
+
+ /*
+ * Convert x86-64 ABI params to i386 ABI
+ */
+ subq $64, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movl %r8d, 0xc(%rsp)
+ movl %r9d, 0x10(%rsp)
+ movl %ebp, 0x14(%rsp)
+ movl %ebx, 0x18(%rsp)
+ movl %eax, 0x1c(%rsp)
+
+ leaq 0x20(%rsp), %rbx
+ sgdt (%rbx)
+ sidt 16(%rbx)
+
+ leaq 1f(%rip), %rbp
+
+ /*
+ * Switch to IDT and GDT with 32-bit segments. These are the firmware
+ * GDT and IDT that were installed when the kernel started executing.
+ * The pointers were saved by the efi32_entry() routine below.
+ *
+ * Pass the saved DS selector to the 32-bit code, and use far return to
+ * restore the saved CS selector.
+ */
+ lidt efi32_boot_idt(%rip)
+ lgdt efi32_boot_gdt(%rip)
+
+ movzwl efi32_boot_ds(%rip), %edx
+ movzwq efi32_boot_cs(%rip), %rax
+ pushq %rax
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $64, %rsp
+ movq %rdi, %rax
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+ /* Clear out 32-bit selector from FS and GS */
+ xorl %ebx, %ebx
+ movl %ebx, %fs
+ movl %ebx, %gs
+
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ roll $1, %eax
+ rorq $1, %rax
+
+ pop %rbx
+ pop %rbp
+ RET
+SYM_FUNC_END(__efi64_thunk)
+
+ .code32
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+SYM_FUNC_START(efi32_stub_entry)
+ call 1f
+1: popl %ecx
+
+ /* Clear BSS */
+ xorl %eax, %eax
+ leal (_bss - 1b)(%ecx), %edi
+ leal (_ebss - 1b)(%ecx), %ecx
+ subl %edi, %ecx
+ shrl $2, %ecx
+ cld
+ rep stosl
+
+ add $0x4, %esp /* Discard return address */
+ popl %ecx
+ popl %edx
+ popl %esi
+ jmp efi32_entry
+SYM_FUNC_END(efi32_stub_entry)
+#endif
+
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+SYM_FUNC_START_LOCAL(efi_enter32)
+ /* Load firmware selector into data and stack segment registers */
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+ movl %edx, %ss
+
+ /* Reload pgtables */
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode via EFER */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btrl $_EFER_LME, %eax
+ wrmsr
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ /*
+ * Some firmware will return with interrupts enabled. Be sure to
+ * disable them before we switch GDTs and IDTs.
+ */
+ cli
+
+ lidtl 16(%ebx)
+ lgdtl (%ebx)
+
+ movl %cr4, %eax
+ btsl $(X86_CR4_PAE_BIT), %eax
+ movl %eax, %cr4
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ xorl %eax, %eax
+ lldt %ax
+
+ pushl $__KERNEL_CS
+ pushl %ebp
+
+ /* Enable paging */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+ lret
+SYM_FUNC_END(efi_enter32)
+
+/*
+ * This is the common EFI stub entry point for mixed mode.
+ *
+ * Arguments: %ecx image handle
+ * %edx EFI system table pointer
+ * %esi struct bootparams pointer (or NULL when not using
+ * the EFI handover protocol)
+ *
+ * Since this is the point of no return for ordinary execution, no registers
+ * are considered live except for the function parameters. [Note that the EFI
+ * stub may still exit and return to the firmware using the Exit() EFI boot
+ * service.]
+ */
+SYM_FUNC_START_LOCAL(efi32_entry)
+ call 1f
+1: pop %ebx
+
+ /* Save firmware GDTR and code/data selectors */
+ sgdtl (efi32_boot_gdt - 1b)(%ebx)
+ movw %cs, (efi32_boot_cs - 1b)(%ebx)
+ movw %ds, (efi32_boot_ds - 1b)(%ebx)
+
+ /* Store firmware IDT descriptor */
+ sidtl (efi32_boot_idt - 1b)(%ebx)
+
+ /* Store boot arguments */
+ leal (efi32_boot_args - 1b)(%ebx), %ebx
+ movl %ecx, 0(%ebx)
+ movl %edx, 4(%ebx)
+ movl %esi, 8(%ebx)
+ movb $0x0, 12(%ebx) // efi_is64
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ jmp startup_32
+SYM_FUNC_END(efi32_entry)
+
+/*
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+ * efi_system_table_32_t *sys_table)
+ */
+SYM_FUNC_START(efi32_pe_entry)
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %ebx // save callee-save registers
+ pushl %edi
+
+ call verify_cpu // check for long mode support
+ testl %eax, %eax
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
+ jnz 2f
+
+ movl 8(%ebp), %ecx // image_handle
+ movl 12(%ebp), %edx // sys_table
+ xorl %esi, %esi
+ jmp efi32_entry // pass %ecx, %edx, %esi
+ // no other registers remain live
+
+2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+ RET
+SYM_FUNC_END(efi32_pe_entry)
+
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ .org efi32_stub_entry + 0x200
+ .code64
+SYM_FUNC_START_NOALIGN(efi64_stub_entry)
+ jmp efi_handover_entry
+SYM_FUNC_END(efi64_stub_entry)
+#endif
+
+ .data
+ .balign 8
+SYM_DATA_START_LOCAL(efi32_boot_gdt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_gdt)
+
+SYM_DATA_START_LOCAL(efi32_boot_idt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_idt)
+
+SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
+SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
+SYM_DATA(efi_is64, .byte 1)
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
deleted file mode 100644
index 67e7edcdf..000000000
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
- *
- * Early support for invoking 32-bit EFI services from a 64-bit kernel.
- *
- * Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT and IDT before we make EFI service
- * calls.
- *
- * On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identity
- * mapped pagetable and haven't transitioned to 64-bit virtual addresses
- * yet.
- */
-
-#include <linux/linkage.h>
-#include <asm/msr.h>
-#include <asm/page_types.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
- .code64
- .text
-SYM_FUNC_START(__efi64_thunk)
- push %rbp
- push %rbx
-
- movl %ds, %eax
- push %rax
- movl %es, %eax
- push %rax
- movl %ss, %eax
- push %rax
-
- /* Copy args passed on stack */
- movq 0x30(%rsp), %rbp
- movq 0x38(%rsp), %rbx
- movq 0x40(%rsp), %rax
-
- /*
- * Convert x86-64 ABI params to i386 ABI
- */
- subq $64, %rsp
- movl %esi, 0x0(%rsp)
- movl %edx, 0x4(%rsp)
- movl %ecx, 0x8(%rsp)
- movl %r8d, 0xc(%rsp)
- movl %r9d, 0x10(%rsp)
- movl %ebp, 0x14(%rsp)
- movl %ebx, 0x18(%rsp)
- movl %eax, 0x1c(%rsp)
-
- leaq 0x20(%rsp), %rbx
- sgdt (%rbx)
-
- addq $16, %rbx
- sidt (%rbx)
-
- leaq 1f(%rip), %rbp
-
- /*
- * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
- * and IDT that was installed when the kernel started executing. The
- * pointers were saved at the EFI stub entry point in head_64.S.
- *
- * Pass the saved DS selector to the 32-bit code, and use far return to
- * restore the saved CS selector.
- */
- leaq efi32_boot_idt(%rip), %rax
- lidt (%rax)
- leaq efi32_boot_gdt(%rip), %rax
- lgdt (%rax)
-
- movzwl efi32_boot_ds(%rip), %edx
- movzwq efi32_boot_cs(%rip), %rax
- pushq %rax
- leaq efi_enter32(%rip), %rax
- pushq %rax
- lretq
-
-1: addq $64, %rsp
- movq %rdi, %rax
-
- pop %rbx
- movl %ebx, %ss
- pop %rbx
- movl %ebx, %es
- pop %rbx
- movl %ebx, %ds
- /* Clear out 32-bit selector from FS and GS */
- xorl %ebx, %ebx
- movl %ebx, %fs
- movl %ebx, %gs
-
- /*
- * Convert 32-bit status code into 64-bit.
- */
- roll $1, %eax
- rorq $1, %rax
-
- pop %rbx
- pop %rbp
- RET
-SYM_FUNC_END(__efi64_thunk)
-
- .code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-SYM_FUNC_START_LOCAL(efi_enter32)
- /* Load firmware selector into data and stack segment registers */
- movl %edx, %ds
- movl %edx, %es
- movl %edx, %fs
- movl %edx, %gs
- movl %edx, %ss
-
- /* Reload pgtables */
- movl %cr3, %eax
- movl %eax, %cr3
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Disable long mode via EFER */
- movl $MSR_EFER, %ecx
- rdmsr
- btrl $_EFER_LME, %eax
- wrmsr
-
- call *%edi
-
- /* We must preserve return value */
- movl %eax, %edi
-
- /*
- * Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs and IDTs.
- */
- cli
-
- lidtl (%ebx)
- subl $16, %ebx
-
- lgdtl (%ebx)
-
- movl %cr4, %eax
- btsl $(X86_CR4_PAE_BIT), %eax
- movl %eax, %cr4
-
- movl %cr3, %eax
- movl %eax, %cr3
-
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- xorl %eax, %eax
- lldt %ax
-
- pushl $__KERNEL_CS
- pushl %ebp
-
- /* Enable paging */
- movl %cr0, %eax
- btsl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
- lret
-SYM_FUNC_END(efi_enter32)
-
- .data
- .balign 8
-SYM_DATA_START(efi32_boot_gdt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_gdt)
-
-SYM_DATA_START(efi32_boot_idt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_idt)
-
-SYM_DATA_START(efi32_boot_cs)
- .word 0
-SYM_DATA_END(efi32_boot_cs)
-
-SYM_DATA_START(efi32_boot_ds)
- .word 0
-SYM_DATA_END(efi32_boot_ds)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3b354eb95..1cfe9802a 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
#ifdef CONFIG_RELOCATABLE
leal startup_32@GOTOFF(%edx), %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32() will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry() will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- subl image_offset@GOTOFF(%edx), %ebx
-#endif
-
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
jmp *%eax
SYM_FUNC_END(startup_32)
-#ifdef CONFIG_EFI_STUB
-SYM_FUNC_START(efi32_stub_entry)
- add $0x4, %esp
- movl 8(%esp), %esi /* save boot_params pointer */
- call efi_main
- /* efi_main returns the possibly relocated address of startup_32 */
- jmp *%eax
-SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
-#endif
-
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
@@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
*/
/* push arguments for extract_kernel: */
- pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
pushl %ebp /* output address */
- pushl input_len@GOTOFF(%ebx) /* input_len */
- leal input_data@GOTOFF(%ebx), %eax
- pushl %eax /* input_data */
- leal boot_heap@GOTOFF(%ebx), %eax
- pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call extract_kernel /* returns kernel location in %eax */
+ call extract_kernel /* returns kernel entry point in %eax */
addl $24, %esp
/*
@@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x00cf92000000ffff /* __KERNEL_DS */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
-#ifdef CONFIG_EFI_STUB
-SYM_DATA(image_offset, .long 0)
-#endif
-
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-boot_heap:
- .fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b4bd6df29..0d7aef10b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32)
1:
/* Setup Exception handling for SEV-ES */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
call startup32_load_idt
+#endif
/* Make sure cpu supports long mode. */
call verify_cpu
@@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32)
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- subl rva(image_offset)(%ebp), %ebx
-#endif
-
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32)
*/
/*
* If SEV is active then set the encryption mask in the page tables.
- * This will insure that when the kernel is copied and decompressed
+ * This will ensure that when the kernel is copied and decompressed
* it will be done so encrypted.
*/
- call get_sev_encryption_bit
xorl %edx, %edx
#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call get_sev_encryption_bit
+ xorl %edx, %edx
testl %eax, %eax
jz 1f
subl $32, %eax /* Encryption bit is always above bit 31 */
@@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32)
movl $__BOOT_TSS, %eax
ltr %ax
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+#endif
+
/*
* Setup for the jump to 64bit mode
*
@@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32)
*/
leal rva(startup_64)(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
- movl rva(efi32_boot_args)(%ebp), %edi
- testl %edi, %edi
- jz 1f
- leal rva(efi64_stub_entry)(%ebp), %eax
- movl rva(efi32_boot_args+4)(%ebp), %esi
- movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
- testl %edx, %edx
- jnz 1f
- /*
- * efi_pe_entry uses MS calling convention, which requires 32 bytes of
- * shadow space on the stack even if all arguments are passed in
- * registers. We also need an additional 8 bytes for the space that
- * would be occupied by the return address, and this also results in
- * the correct stack alignment for entry.
- */
- subl $40, %esp
- leal rva(efi_pe_entry)(%ebp), %eax
- movl %edi, %ecx // MS calling convention
- movl %esi, %edx
+ cmpb $1, rva(efi_is64)(%ebp)
+ je 1f
+ leal rva(startup_64_mixed_mode)(%ebp), %eax
1:
#endif
- /* Check if the C-bit position is correct when SEV is active */
- call startup32_check_sev_cbit
pushl $__KERNEL_CS
pushl %eax
@@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32)
lret
SYM_FUNC_END(startup_32)
-#ifdef CONFIG_EFI_MIXED
- .org 0x190
-SYM_FUNC_START(efi32_stub_entry)
- add $0x4, %esp /* Discard return address */
- popl %ecx
- popl %edx
- popl %esi
-
- call 1f
-1: pop %ebp
- subl $ rva(1b), %ebp
-
- movl %esi, rva(efi32_boot_args+8)(%ebp)
-SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
- movl %ecx, rva(efi32_boot_args)(%ebp)
- movl %edx, rva(efi32_boot_args+4)(%ebp)
- movb $0, rva(efi_is64)(%ebp)
-
- /* Save firmware GDTR and code/data selectors */
- sgdtl rva(efi32_boot_gdt)(%ebp)
- movw %cs, rva(efi32_boot_cs)(%ebp)
- movw %ds, rva(efi32_boot_ds)(%ebp)
-
- /* Store firmware IDT descriptor */
- sidtl rva(efi32_boot_idt)(%ebp)
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- jmp startup_32
-SYM_FUNC_END(efi32_stub_entry)
-#endif
-
.code64
.org 0x200
SYM_CODE_START(startup_64)
@@ -372,20 +314,6 @@ SYM_CODE_START(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- * image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
- movl image_offset(%rip), %eax
- subq %rax, %rbp
-#endif
-
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
@@ -424,10 +352,6 @@ SYM_CODE_START(startup_64)
* For the trampoline, we need the top page table to reside in lower
* memory as we don't have a way to load 64-bit values into CR3 in
* 32-bit mode.
- *
- * We go though the trampoline even if we don't have to: if we're
- * already in a desired paging mode. This way the trampoline code gets
- * tested on every boot.
*/
/* Make sure we have GDT with 32-bit code segment */
@@ -442,10 +366,14 @@ SYM_CODE_START(startup_64)
lretq
.Lon_kernel_cs:
+ /*
+ * RSI holds a pointer to a boot_params structure provided by the
+ * loader, and this needs to be preserved across C function calls. So
+ * move it into a callee saved register.
+ */
+ movq %rsi, %r15
- pushq %rsi
call load_stage1_idt
- popq %rsi
#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
@@ -456,82 +384,24 @@ SYM_CODE_START(startup_64)
* CPUID instructions being issued, so go ahead and do that now via
* sev_enable(), which will also handle the rest of the SEV-related
* detection/setup to ensure that has been done in advance of any dependent
- * code.
+ * code. Pass the boot_params pointer as the first argument.
*/
- pushq %rsi
- movq %rsi, %rdi /* real mode address */
+ movq %r15, %rdi
call sev_enable
- popq %rsi
#endif
/*
- * paging_prepare() sets up the trampoline and checks if we need to
- * enable 5-level paging.
- *
- * paging_prepare() returns a two-quadword structure which lands
- * into RDX:RAX:
- * - Address of the trampoline is returned in RAX.
- * - Non zero RDX means trampoline needs to enable 5-level
- * paging.
- *
- * RSI holds real mode data and needs to be preserved across
- * this function call.
- */
- pushq %rsi
- movq %rsi, %rdi /* real mode address */
- call paging_prepare
- popq %rsi
-
- /* Save the trampoline address in RCX */
- movq %rax, %rcx
-
- /* Set up 32-bit addressable stack */
- leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
-
- /*
- * Preserve live 64-bit registers on the stack: this is necessary
- * because the architecture does not guarantee that GPRs will retain
- * their full 64-bit values across a 32-bit mode switch.
- */
- pushq %rbp
- pushq %rbx
- pushq %rsi
-
- /*
- * Push the 64-bit address of trampoline_return() onto the new stack.
- * It will be used by the trampoline to return to the main code. Due to
- * the 32-bit mode switch, it cannot be kept it in a register either.
- */
- leaq trampoline_return(%rip), %rdi
- pushq %rdi
-
- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
- pushq $__KERNEL32_CS
- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
- pushq %rax
- lretq
-trampoline_return:
- /* Restore live 64-bit registers */
- popq %rsi
- popq %rbx
- popq %rbp
-
- /* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq rva(boot_stack_end)(%rbx), %rsp
-
- /*
- * cleanup_trampoline() would restore trampoline memory.
- *
- * RDI is address of the page table to use instead of page table
- * in trampoline memory (if required).
+ * configure_5level_paging() updates the number of paging levels using
+ * a trampoline in 32-bit addressable memory if the current number does
+ * not match the desired number.
*
- * RSI holds real mode data and needs to be preserved across
- * this function call.
+ * Pass the boot_params pointer as the first argument. The second
+ * argument is the relocated address of the page table to use instead
+ * of the page table in trampoline memory (if required).
*/
- pushq %rsi
- leaq rva(top_pgtable)(%rbx), %rdi
- call cleanup_trampoline
- popq %rsi
+ movq %r15, %rdi
+ leaq rva(top_pgtable)(%rbx), %rsi
+ call configure_5level_paging
/* Zero EFLAGS */
pushq $0
@@ -541,7 +411,6 @@ trampoline_return:
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
- pushq %rsi
leaq (_bss-8)(%rip), %rsi
leaq rva(_bss-8)(%rbx), %rdi
movl $(_bss - startup_32), %ecx
@@ -549,7 +418,6 @@ trampoline_return:
std
rep movsq
cld
- popq %rsi
/*
* The GDT may get overwritten either during the copy we just did or
@@ -568,19 +436,6 @@ trampoline_return:
jmp *%rax
SYM_CODE_END(startup_64)
-#ifdef CONFIG_EFI_STUB
- .org 0x390
-SYM_FUNC_START(efi64_stub_entry)
- and $~0xf, %rsp /* realign the stack */
- movq %rdx, %rbx /* save boot_params pointer */
- call efi_main
- movq %rbx,%rsi
- leaq rva(startup_64)(%rax), %rax
- jmp *%rax
-SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
-#endif
-
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
@@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
shrq $3, %rcx
rep stosq
- pushq %rsi
call load_stage2_idt
/* Pass boot_params to initialize_identity_maps() */
- movq (%rsp), %rdi
+ movq %r15, %rdi
call initialize_identity_maps
- popq %rsi
/*
* Do the extraction, and jump to the new kernel..
*/
- pushq %rsi /* Save the real mode argument */
- movq %rsi, %rdi /* real mode address */
- leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
- leaq input_data(%rip), %rdx /* input_data */
- movl input_len(%rip), %ecx /* input_len */
- movq %rbp, %r8 /* output target address */
- movl output_len(%rip), %r9d /* decompressed length, end of relocs */
- call extract_kernel /* returns kernel location in %rax */
- popq %rsi
+ /* pass struct boot_params pointer and output target address */
+ movq %r15, %rdi
+ movq %rbp, %rsi
+ call extract_kernel /* returns kernel entry point in %rax */
/*
* Jump to the decompressed kernel.
*/
+ movq %r15, %rsi
jmp *%rax
SYM_FUNC_END(.Lrelocated)
- .code32
/*
- * This is the 32-bit trampoline that will be copied over to low memory.
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
*
* Return address is at the top of the stack (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
*/
+ .section ".rodata", "a", @progbits
SYM_CODE_START(trampoline_32bit_src)
- /* Set up data and stack segments */
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %ss
+ /*
+ * Preserve callee save 64-bit registers on the stack: this is
+ * necessary because the architecture does not guarantee that GPRs will
+ * retain their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %r15
+ pushq %r14
+ pushq %r13
+ pushq %r12
+ pushq %rbp
+ pushq %rbx
+
+ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+ movq %rsp, %rbx
+ shrq $32, %rbx
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq 0f(%rip), %rax
+ pushq %rax
+ lretq
+
+ /*
+ * The 32-bit code below will do a far jump back to long mode and end
+ * up here after reconfiguring the number of paging levels. First, the
+ * stack pointer needs to be restored to its full 64-bit value before
+ * the callee save register contents can be popped from the stack.
+ */
+.Lret:
+ shlq $32, %rbx
+ orq %rbx, %rsp
+
+ /* Restore the preserved 64-bit registers */
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ retq
+
+ .code32
+0:
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- /* Check what paging mode we want to be in after the trampoline */
- testl %edx, %edx
- jz 1f
-
- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
- movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jnz 3f
- jmp 2f
-1:
- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
- movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jz 3f
-2:
/* Point CR3 to the trampoline's new top level page table */
- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
- movl %eax, %cr3
-3:
+ movl %edi, %cr3
+
/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
- pushl %ecx
- pushl %edx
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
/* Avoid writing EFER if no change was made (for TDX guest) */
jc 1f
wrmsr
-1: popl %edx
- popl %ecx
-
-#ifdef CONFIG_X86_MCE
- /*
- * Preserve CR4.MCE if the kernel will enable #MC support.
- * Clearing MCE may fault in some environments (that also force #MC
- * support). Any machine check that occurs before #MC support is fully
- * configured will crash the system regardless of the CR4.MCE value set
- * here.
- */
- movl %cr4, %eax
- andl $X86_CR4_MCE, %eax
-#else
- movl $0, %eax
-#endif
-
- /* Enable PAE and LA57 (if required) paging modes */
- orl $X86_CR4_PAE, %eax
- testl %edx, %edx
- jz 1f
- orl $X86_CR4_LA57, %eax
1:
+ /* Toggle CR4.LA57 */
+ movl %cr4, %eax
+ btcl $X86_CR4_LA57_BIT, %eax
movl %eax, %cr4
- /* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
- /* Prepare the stack for far return to Long Mode */
- pushl $__KERNEL_CS
- pushl %eax
-
/* Enable paging again. */
movl %cr0, %eax
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- lret
+ /*
+ * Return to the 64-bit calling code using LJMP rather than LRET, to
+ * avoid the need for a 32-bit addressable stack. The destination
+ * address will be adjusted after the template code is copied into a
+ * 32-bit addressable buffer.
+ */
+.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
SYM_CODE_END(trampoline_32bit_src)
- .code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
- /* Return from the trampoline */
- retq
-SYM_FUNC_END(.Lpaging_enabled)
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
/*
* The trampoline code has a size limit.
@@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled)
*/
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
- .code32
+ .text
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
@@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
jmp 1b
SYM_FUNC_END(.Lno_longmode)
+ .globl verify_cpu
#include "../../kernel/verify_cpu.S"
.data
@@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt)
.endr
SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-SYM_DATA_START(boot32_idt_desc)
- .word boot32_idt_end - boot32_idt - 1
- .long 0
-SYM_DATA_END(boot32_idt_desc)
- .balign 8
-SYM_DATA_START(boot32_idt)
- .rept 32
- .quad 0
- .endr
-SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
-#endif
-
-#ifdef CONFIG_EFI_STUB
-SYM_DATA(image_offset, .long 0)
-#endif
-#ifdef CONFIG_EFI_MIXED
-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
-SYM_DATA(efi_is64, .byte 1)
-
-#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
-#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
-#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
-
- __HEAD
- .code32
-SYM_FUNC_START(efi32_pe_entry)
-/*
- * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
- * efi_system_table_32_t *sys_table)
- */
-
- pushl %ebp
- movl %esp, %ebp
- pushl %eax // dummy push to allocate loaded_image
-
- pushl %ebx // save callee-save registers
- pushl %edi
-
- call verify_cpu // check for long mode support
- testl %eax, %eax
- movl $0x80000003, %eax // EFI_UNSUPPORTED
- jnz 2f
-
- call 1f
-1: pop %ebx
- subl $ rva(1b), %ebx
-
- /* Get the loaded image protocol pointer from the image handle */
- leal -4(%ebp), %eax
- pushl %eax // &loaded_image
- leal rva(loaded_image_proto)(%ebx), %eax
- pushl %eax // pass the GUID address
- pushl 8(%ebp) // pass the image handle
-
- /*
- * Note the alignment of the stack frame.
- * sys_table
- * handle <-- 16-byte aligned on entry by ABI
- * return address
- * frame pointer
- * loaded_image <-- local variable
- * saved %ebx <-- 16-byte aligned here
- * saved %edi
- * &loaded_image
- * &loaded_image_proto
- * handle <-- 16-byte aligned for call to handle_protocol
- */
-
- movl 12(%ebp), %eax // sys_table
- movl ST32_boottime(%eax), %eax // sys_table->boottime
- call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
- addl $12, %esp // restore argument space
- testl %eax, %eax
- jnz 2f
-
- movl 8(%ebp), %ecx // image_handle
- movl 12(%ebp), %edx // sys_table
- movl -4(%ebp), %esi // loaded_image
- movl LI32_image_base(%esi), %esi // loaded_image->image_base
- movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
- /*
- * We need to set the image_offset variable here since startup_32() will
- * use it before we get to the 64-bit efi_pe_entry() in C code.
- */
- subl %esi, %ebx
- movl %ebx, rva(image_offset)(%ebp) // save image_offset
- jmp efi32_pe_stub_entry
-
-2: popl %edi // restore callee-save registers
- popl %ebx
- leave
- RET
-SYM_FUNC_END(efi32_pe_entry)
-
- .section ".rodata"
- /* EFI loaded image protocol GUID */
- .balign 4
-SYM_DATA_START_LOCAL(loaded_image_proto)
- .long 0x5b1b31a1
- .word 0x9562, 0x11d2
- .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
-SYM_DATA_END(loaded_image_proto)
-#endif
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- __HEAD
- .code32
-/*
- * Write an IDT entry into boot32_idt
- *
- * Parameters:
- *
- * %eax: Handler address
- * %edx: Vector number
- *
- * Physical offset is expected in %ebp
- */
-SYM_FUNC_START(startup32_set_idt_entry)
- push %ebx
- push %ecx
-
- /* IDT entry address to %ebx */
- leal rva(boot32_idt)(%ebp), %ebx
- shl $3, %edx
- addl %edx, %ebx
-
- /* Build IDT entry, lower 4 bytes */
- movl %eax, %edx
- andl $0x0000ffff, %edx # Target code segment offset [15:0]
- movl $__KERNEL32_CS, %ecx # Target code segment selector
- shl $16, %ecx
- orl %ecx, %edx
-
- /* Store lower 4 bytes to IDT */
- movl %edx, (%ebx)
-
- /* Build IDT entry, upper 4 bytes */
- movl %eax, %edx
- andl $0xffff0000, %edx # Target code segment offset [31:16]
- orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
-
- /* Store upper 4 bytes to IDT */
- movl %edx, 4(%ebx)
-
- pop %ecx
- pop %ebx
- RET
-SYM_FUNC_END(startup32_set_idt_entry)
-#endif
-
-SYM_FUNC_START(startup32_load_idt)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- /* #VC handler */
- leal rva(startup32_vc_handler)(%ebp), %eax
- movl $X86_TRAP_VC, %edx
- call startup32_set_idt_entry
-
- /* Load IDT */
- leal rva(boot32_idt)(%ebp), %eax
- movl %eax, rva(boot32_idt_desc+2)(%ebp)
- lidt rva(boot32_idt_desc)(%ebp)
-#endif
- RET
-SYM_FUNC_END(startup32_load_idt)
-
-/*
- * Check for the correct C-bit position when the startup_32 boot-path is used.
- *
- * The check makes use of the fact that all memory is encrypted when paging is
- * disabled. The function creates 64 bits of random data using the RDRAND
- * instruction. RDRAND is mandatory for SEV guests, so always available. If the
- * hypervisor violates that the kernel will crash right here.
- *
- * The 64 bits of random data are stored to a memory location and at the same
- * time kept in the %eax and %ebx registers. Since encryption is always active
- * when paging is off the random data will be stored encrypted in main memory.
- *
- * Then paging is enabled. When the C-bit position is correct all memory is
- * still mapped encrypted and comparing the register values with memory will
- * succeed. An incorrect C-bit position will map all memory unencrypted, so that
- * the compare will use the encrypted random data and fail.
- */
-SYM_FUNC_START(startup32_check_sev_cbit)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
-
- /* Check for non-zero sev_status */
- movl rva(sev_status)(%ebp), %eax
- testl %eax, %eax
- jz 4f
-
- /*
- * Get two 32-bit random values - Don't bail out if RDRAND fails
- * because it is better to prevent forward progress if no random value
- * can be gathered.
- */
-1: rdrand %eax
- jnc 1b
-2: rdrand %ebx
- jnc 2b
-
- /* Store to memory and keep it in the registers */
- movl %eax, rva(sev_check_data)(%ebp)
- movl %ebx, rva(sev_check_data+4)(%ebp)
-
- /* Enable paging to see if encryption is active */
- movl %cr0, %edx /* Backup %cr0 in %edx */
- movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
- movl %ecx, %cr0
-
- cmpl %eax, rva(sev_check_data)(%ebp)
- jne 3f
- cmpl %ebx, rva(sev_check_data+4)(%ebp)
- jne 3f
-
- movl %edx, %cr0 /* Restore previous %cr0 */
-
- jmp 4f
-
-3: /* Check failed - hlt the machine */
- hlt
- jmp 3b
-
-4:
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-#endif
- RET
-SYM_FUNC_END(startup32_check_sev_cbit)
-
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
-
SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
.balign 16
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index b4155273d..b8c42339b 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode)
* or does not touch all the pages covering them.
*/
kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
- boot_params = rmode;
- kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ boot_params_ptr = rmode;
+ kernel_add_identity_map((unsigned long)boot_params_ptr,
+ (unsigned long)(boot_params_ptr + 1));
cmdline = get_cmd_line_ptr();
kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
@@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode)
* Also map the setup_data entries passed via boot_params in case they
* need to be accessed by uncompressed kernel via the identity mapping.
*/
- sd = (struct setup_data *)boot_params->hdr.setup_data;
+ sd = (struct setup_data *)boot_params_ptr->hdr.setup_data;
while (sd) {
unsigned long sd_addr = (unsigned long)sd;
@@ -393,3 +394,8 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
kernel_add_identity_map(address, end);
}
+
+void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code)
+{
+ /* Empty handler to ignore NMI during early boot */
+}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 3cdf94b41..d100284bb 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -61,6 +61,7 @@ void load_stage2_idt(void)
boot_idt_desc.address = (unsigned long)boot_idt;
set_idt_entry(X86_TRAP_PF, boot_page_fault);
+ set_idt_entry(X86_TRAP_NMI, boot_nmi_trap);
#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
index 22890e199..4d03c8562 100644
--- a/arch/x86/boot/compressed/idt_handlers_64.S
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -70,6 +70,7 @@ SYM_FUNC_END(\name)
.code64
EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1
+EXCEPTION_HANDLER boot_nmi_trap do_boot_nmi_trap error_code=0
#ifdef CONFIG_AMD_MEM_ENCRYPT
EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e476bcbd9..9794d9174 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void)
unsigned long hash = 0;
hash = rotate_xor(hash, build_str, sizeof(build_str));
- hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+ hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr));
return hash;
}
@@ -383,7 +383,7 @@ static void handle_mem_options(void)
static void mem_avoid_init(unsigned long input, unsigned long input_size,
unsigned long output)
{
- unsigned long init_size = boot_params->hdr.init_size;
+ unsigned long init_size = boot_params_ptr->hdr.init_size;
u64 initrd_start, initrd_size;
unsigned long cmd_line, cmd_line_size;
@@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
/* Avoid initrd. */
- initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
- initrd_start |= boot_params->hdr.ramdisk_image;
- initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
- initrd_size |= boot_params->hdr.ramdisk_size;
+ initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32;
+ initrd_start |= boot_params_ptr->hdr.ramdisk_image;
+ initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32;
+ initrd_size |= boot_params_ptr->hdr.ramdisk_size;
mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
/* No need to set mapping for initrd, it will be handled in VO. */
@@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
}
/* Avoid boot parameters. */
- mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
- mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr;
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr);
/* We don't need to set a mapping for setup_data. */
@@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
}
/* Avoid all entries in the setup_data linked list. */
- ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data;
while (ptr) {
struct mem_vector avoid;
@@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region,
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
{
- struct efi_info *e = &boot_params->efi_info;
+ struct efi_info *e = &boot_params_ptr->efi_info;
bool efi_mirror_found = false;
struct mem_vector region;
efi_memory_desc_t *md;
@@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum,
struct boot_e820_entry *entry;
/* Verify potential e820 positions, appending to slots list. */
- for (i = 0; i < boot_params->e820_entries; i++) {
- entry = &boot_params->e820_table[i];
+ for (i = 0; i < boot_params_ptr->e820_entries; i++) {
+ entry = &boot_params_ptr->e820_table[i];
/* Skip non-RAM entries. */
if (entry->type != E820_TYPE_RAM)
continue;
@@ -836,7 +836,7 @@ void choose_random_location(unsigned long input,
return;
}
- boot_params->hdr.loadflags |= KASLR_FLAG;
+ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
if (IS_ENABLED(CONFIG_X86_32))
mem_limit = KERNEL_IMAGE_SIZE;
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index a73e4d783..32f7cc8a8 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -12,16 +12,13 @@
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
+#include <asm/segment.h>
+#include <asm/trapnr.h>
.text
.code32
SYM_FUNC_START(get_sev_encryption_bit)
- xor %eax, %eax
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
push %ebx
- push %ecx
- push %edx
movl $0x80000000, %eax /* CPUID to check the highest leaf */
cpuid
@@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
xor %eax, %eax
.Lsev_exit:
- pop %edx
- pop %ecx
pop %ebx
-
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
-
RET
SYM_FUNC_END(get_sev_encryption_bit)
@@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid)
jmp 1b
SYM_CODE_END(sev_es_req_cpuid)
-SYM_CODE_START(startup32_vc_handler)
+SYM_CODE_START_LOCAL(startup32_vc_handler)
pushl %eax
pushl %ebx
pushl %ecx
@@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler)
jmp .Lfail
SYM_CODE_END(startup32_vc_handler)
+/*
+ * Write an IDT entry into boot32_idt
+ *
+ * Parameters:
+ *
+ * %eax: Handler address
+ * %edx: Vector number
+ * %ecx: IDT address
+ */
+SYM_FUNC_START_LOCAL(startup32_set_idt_entry)
+ /* IDT entry address to %ecx */
+ leal (%ecx, %edx, 8), %ecx
+
+ /* Build IDT entry, lower 4 bytes */
+ movl %eax, %edx
+ andl $0x0000ffff, %edx # Target code segment offset [15:0]
+ orl $(__KERNEL32_CS << 16), %edx # Target code segment selector
+
+ /* Store lower 4 bytes to IDT */
+ movl %edx, (%ecx)
+
+ /* Build IDT entry, upper 4 bytes */
+ movl %eax, %edx
+ andl $0xffff0000, %edx # Target code segment offset [31:16]
+ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
+
+ /* Store upper 4 bytes to IDT */
+ movl %edx, 4(%ecx)
+
+ RET
+SYM_FUNC_END(startup32_set_idt_entry)
+
+SYM_FUNC_START(startup32_load_idt)
+ push %ebp
+ push %ebx
+
+ call 1f
+1: pop %ebp
+
+ leal (boot32_idt - 1b)(%ebp), %ebx
+
+ /* #VC handler */
+ leal (startup32_vc_handler - 1b)(%ebp), %eax
+ movl $X86_TRAP_VC, %edx
+ movl %ebx, %ecx
+ call startup32_set_idt_entry
+
+ /* Load IDT */
+ leal (boot32_idt_desc - 1b)(%ebp), %ecx
+ movl %ebx, 2(%ecx)
+ lidt (%ecx)
+
+ pop %ebx
+ pop %ebp
+ RET
+SYM_FUNC_END(startup32_load_idt)
+
+/*
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
+ *
+ * The check makes use of the fact that all memory is encrypted when paging is
+ * disabled. The function creates 64 bits of random data using the RDRAND
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+ * hypervisor violates that the kernel will crash right here.
+ *
+ * The 64 bits of random data are stored to a memory location and at the same
+ * time kept in the %eax and %ebx registers. Since encryption is always active
+ * when paging is off the random data will be stored encrypted in main memory.
+ *
+ * Then paging is enabled. When the C-bit position is correct all memory is
+ * still mapped encrypted and comparing the register values with memory will
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+ * the compare will use the encrypted random data and fail.
+ */
+SYM_FUNC_START(startup32_check_sev_cbit)
+ pushl %ebx
+ pushl %ebp
+
+ call 0f
+0: popl %ebp
+
+ /* Check for non-zero sev_status */
+ movl (sev_status - 0b)(%ebp), %eax
+ testl %eax, %eax
+ jz 4f
+
+ /*
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
+ * because it is better to prevent forward progress if no random value
+ * can be gathered.
+ */
+1: rdrand %eax
+ jnc 1b
+2: rdrand %ebx
+ jnc 2b
+
+ /* Store to memory and keep it in the registers */
+ leal (sev_check_data - 0b)(%ebp), %ebp
+ movl %eax, 0(%ebp)
+ movl %ebx, 4(%ebp)
+
+ /* Enable paging to see if encryption is active */
+ movl %cr0, %edx /* Backup %cr0 in %edx */
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+ movl %ecx, %cr0
+
+ cmpl %eax, 0(%ebp)
+ jne 3f
+ cmpl %ebx, 4(%ebp)
+ jne 3f
+
+ movl %edx, %cr0 /* Restore previous %cr0 */
+
+ jmp 4f
+
+3: /* Check failed - hlt the machine */
+ hlt
+ jmp 3b
+
+4:
+ popl %ebp
+ popl %ebx
+ RET
+SYM_FUNC_END(startup32_check_sev_cbit)
+
.code64
#include "../../kernel/sev_verify_cbit.S"
.data
-#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
SYM_DATA(sme_me_mask, .quad 0)
SYM_DATA(sev_status, .quad 0)
SYM_DATA(sev_check_data, .quad 0)
-#endif
+
+SYM_DATA_START_LOCAL(boot32_idt)
+ .rept 32
+ .quad 0
+ .endr
+SYM_DATA_END(boot32_idt)
+
+SYM_DATA_START_LOCAL(boot32_idt_desc)
+ .word . - boot32_idt - 1
+ .long 0
+SYM_DATA_END(boot32_idt_desc)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index cf690d871..8ae7893d7 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n);
/*
* This is set up by the setup-routine at boot-time
*/
-struct boot_params *boot_params;
+struct boot_params *boot_params_ptr;
struct port_io_ops pio_ops;
@@ -132,8 +132,8 @@ void __putstr(const char *s)
if (lines == 0 || cols == 0)
return;
- x = boot_params->screen_info.orig_x;
- y = boot_params->screen_info.orig_y;
+ x = boot_params_ptr->screen_info.orig_x;
+ y = boot_params_ptr->screen_info.orig_y;
while ((c = *s++) != '\0') {
if (c == '\n') {
@@ -154,8 +154,8 @@ void __putstr(const char *s)
}
}
- boot_params->screen_info.orig_x = x;
- boot_params->screen_info.orig_y = y;
+ boot_params_ptr->screen_info.orig_x = x;
+ boot_params_ptr->screen_info.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
{ }
#endif
-static void parse_elf(void *output)
+static size_t parse_elf(void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
@@ -293,10 +293,8 @@ static void parse_elf(void *output)
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
- ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+ ehdr.e_ident[EI_MAG3] != ELFMAG3)
error("Kernel is not a valid ELF file");
- return;
- }
debug_putstr("Parsing ELF... ");
@@ -328,6 +326,35 @@ static void parse_elf(void *output)
}
free(phdrs);
+
+ return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
+}
+
+const unsigned long kernel_total_size = VO__end - VO__text;
+
+static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+
+extern unsigned char input_data[];
+extern unsigned int input_len, output_len;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ void (*error)(char *x))
+{
+ unsigned long entry;
+
+ if (!free_mem_ptr) {
+ free_mem_ptr = (unsigned long)boot_heap;
+ free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
+ }
+
+ if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
+ NULL, error) < 0)
+ return ULONG_MAX;
+
+ entry = parse_elf(outbuf);
+ handle_relocations(outbuf, output_len, virt_addr);
+
+ return entry;
}
/*
@@ -347,25 +374,22 @@ static void parse_elf(void *output)
* |-------uncompressed kernel image---------|
*
*/
-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
- unsigned char *input_data,
- unsigned long input_len,
- unsigned char *output,
- unsigned long output_len)
+asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
{
- const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ memptr heap = (memptr)boot_heap;
unsigned long needed_size;
+ size_t entry_offset;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
- boot_params = rmode;
+ boot_params_ptr = rmode;
/* Clear flags intended for solely in-kernel use. */
- boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG;
- sanitize_boot_params(boot_params);
+ sanitize_boot_params(boot_params_ptr);
- if (boot_params->screen_info.orig_video_mode == 7) {
+ if (boot_params_ptr->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
@@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
vidport = 0x3d4;
}
- lines = boot_params->screen_info.orig_video_lines;
- cols = boot_params->screen_info.orig_video_cols;
+ lines = boot_params_ptr->screen_info.orig_video_lines;
+ cols = boot_params_ptr->screen_info.orig_video_cols;
init_default_io_ops();
@@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* so that early debugging output from the RSDP parsing code can be
* collected.
*/
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
+ boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr();
debug_putstr("early console in extract_kernel\n");
@@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* entries. This ensures the full mapped area is usable RAM
* and doesn't include any reserved areas.
*/
- needed_size = max(output_len, kernel_total_size);
+ needed_size = max_t(unsigned long, output_len, kernel_total_size);
#ifdef CONFIG_X86_64
needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
#endif
@@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
- if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
@@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
debug_putstr("\nDecompressing Linux... ");
- __decompress(input_data, input_len, NULL, NULL, output, output_len,
- NULL, error);
- parse_elf(output);
- handle_relocations(output, output_len, virt_addr);
- debug_putstr("done.\nBooting the kernel.\n");
+
+ entry_offset = decompress_kernel(output, virt_addr, error);
+
+ debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
+ debug_puthex(entry_offset);
+ debug_putstr(").\n");
/* Disable exception handling before booting the kernel */
cleanup_exception_handling();
- return output;
+ return output + entry_offset;
}
void fortify_panic(const char *name)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 20118fb7c..254acd76e 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -52,7 +52,6 @@ extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
void *malloc(int size);
void free(void *where);
-extern struct boot_params *boot_params;
void __putstr(const char *s);
void __puthex(unsigned long value);
#define error_putstr(__x) __putstr(__x)
@@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif
/* ident_map_64.c */
-#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
-#endif
extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
@@ -190,6 +187,7 @@ static inline void cleanup_exception_handling(void) { }
/* IDT Entry Points */
void boot_page_fault(void);
+void boot_nmi_trap(void);
void boot_stage1_vc(void);
void boot_stage2_vc(void);
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index cc9b2529a..6d595abe0 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -3,18 +3,16 @@
#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
-
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
-
-#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
#ifndef __ASSEMBLER__
extern unsigned long *trampoline_32bit;
-extern void trampoline_32bit_src(void *return_ptr);
+extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
+extern const u16 trampoline_ljmp_imm_offset;
#endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 2ac12ff41..51f957b24 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1;
#endif
-struct paging_config {
- unsigned long trampoline_start;
- unsigned long l5_required;
-};
-
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
@@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
* purposes.
*
* Avoid putting the pointer into .bss as it will be cleared between
- * paging_prepare() and extract_kernel().
+ * configure_5level_paging() and extract_kernel().
*/
unsigned long *trampoline_32bit __section(".data");
-extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
@@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void)
*
* Only look for values in the legacy ROM for non-EFI system.
*/
- signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature;
if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
ebda_start = *(unsigned short *)0x40e << 4;
@@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void)
bios_start = round_down(bios_start, PAGE_SIZE);
/* Find the first usable memory region under bios_start. */
- for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) {
unsigned long new = bios_start;
- entry = &boot_params->e820_table[i];
+ entry = &boot_params_ptr->e820_table[i];
/* Skip all entries above bios_start. */
if (bios_start <= entry->addr)
@@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void)
return bios_start - TRAMPOLINE_32BIT_SIZE;
}
-struct paging_config paging_prepare(void *rmode)
+asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
{
- struct paging_config paging_config = {};
+ void (*toggle_la57)(void *cr3);
+ bool l5_required = false;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
- boot_params = rmode;
+ boot_params_ptr = bp;
/*
* Check if LA57 is desired and supported.
@@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode)
!cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
- paging_config.l5_required = 1;
+ l5_required = true;
+
+ /* Initialize variables for 5-level paging */
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
}
- paging_config.trampoline_start = find_trampoline_placement();
+ /*
+ * The trampoline will not be used if the paging mode is already set to
+ * the desired one.
+ */
+ if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ return;
- trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+ trampoline_32bit = (unsigned long *)find_trampoline_placement();
/* Preserve trampoline memory */
memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
@@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode)
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
/* Copy trampoline code in place */
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ toggle_la57 = memcpy(trampoline_32bit +
+ TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
/*
+ * Avoid the need for a stack in the 32-bit trampoline code, by using
+ * LJMP rather than LRET to return back to long mode. LJMP takes an
+ * immediate absolute address, which needs to be adjusted based on the
+ * placement of the trampoline.
+ */
+ *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
+ (unsigned long)toggle_la57;
+
+ /*
* The code below prepares page table in trampoline memory.
*
* The new page table will be used by trampoline code for switching
* from 4- to 5-level paging or vice versa.
- *
- * If switching is not required, the page table is unused: trampoline
- * code wouldn't touch CR3.
- */
-
- /*
- * We are not going to use the page table in trampoline memory if we
- * are already in the desired paging mode.
*/
- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
- goto out;
- if (paging_config.l5_required) {
+ if (l5_required) {
/*
* For 4- to 5-level paging transition, set up current CR3 as
* the first and the only entry in a new top-level page table.
*/
- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
} else {
unsigned long src;
@@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode)
* may be above 4G.
*/
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
- (void *)src, PAGE_SIZE);
+ memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
}
-out:
- return paging_config;
-}
-
-void cleanup_trampoline(void *pgtable)
-{
- void *trampoline_pgtable;
-
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+ toggle_la57(trampoline_32bit);
/*
- * Move the top level page table out of trampoline memory,
- * if it's there.
+ * Move the top level page table out of trampoline memory.
*/
- if ((void *)__native_read_cr3() == trampoline_pgtable) {
- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
- native_write_cr3((unsigned long)pgtable);
- }
+ memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);
/* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
-
- /* Initialize variables for 5-level paging */
-#ifdef CONFIG_X86_5LEVEL
- if (__read_cr4() & X86_CR4_LA57) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
- }
-#endif
}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 9c91cc40f..d07e665bb 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -327,20 +327,25 @@ static void enforce_vmpl0(void)
*/
#define SNP_FEATURES_PRESENT (0)
+u64 snp_get_unsupported_features(u64 status)
+{
+ if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
+ return 0;
+
+ return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+}
+
void snp_check_features(void)
{
u64 unsupported;
- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
- return;
-
/*
* Terminate the boot if hypervisor has enabled any feature lacking
* guest side implementation. Pass on the unsupported features mask through
* EXIT_INFO_2 of the GHCB protocol so that those features can be reported
* as part of the guest boot failure.
*/
- unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ unsupported = snp_get_unsupported_features(sev_status);
if (unsupported) {
if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
@@ -350,35 +355,22 @@ void snp_check_features(void)
}
}
-void sev_enable(struct boot_params *bp)
+/*
+ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
+ *
+ * Returns < 0 if SEV is not supported, otherwise the position of the
+ * encryption bit in the page table descriptors.
+ */
+static int sev_check_cpu_support(void)
{
unsigned int eax, ebx, ecx, edx;
- struct msr m;
- bool snp;
-
- /*
- * bp->cc_blob_address should only be set by boot/compressed kernel.
- * Initialize it to 0 to ensure that uninitialized values from
- * buggy bootloaders aren't propagated.
- */
- if (bp)
- bp->cc_blob_address = 0;
-
- /*
- * Do an initial SEV capability check before snp_init() which
- * loads the CPUID page and the same checks afterwards are done
- * without the hypervisor and are trustworthy.
- *
- * If the HV fakes SEV support, the guest will crash'n'burn
- * which is good enough.
- */
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
- return;
+ return -ENODEV;
/*
* Check for the SME/SEV feature:
@@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp)
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1)))
+ return -ENODEV;
+
+ return ebx & 0x3f;
+}
+
+void sev_enable(struct boot_params *bp)
+{
+ struct msr m;
+ int bitpos;
+ bool snp;
+
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 to ensure that uninitialized values from
+ * buggy bootloaders aren't propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+
+ /*
+ * Do an initial SEV capability check before snp_init() which
+ * loads the CPUID page and the same checks afterwards are done
+ * without the hypervisor and are trustworthy.
+ *
+ * If the HV fakes SEV support, the guest will crash'n'burn
+ * which is good enough.
+ */
+
+ if (sev_check_cpu_support() < 0)
return;
/*
@@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp)
/* Now repeat the checks with the SNP CPUID table. */
- /* Recheck the SME/SEV support leaf */
- eax = 0x80000000;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (eax < 0x8000001f)
- return;
-
- /*
- * Recheck for the SME/SEV feature:
- * CPUID Fn8000_001F[EAX]
- * - Bit 0 - Secure Memory Encryption support
- * - Bit 1 - Secure Encrypted Virtualization support
- * CPUID Fn8000_001F[EBX]
- * - Bits 5:0 - Pagetable bit position used to indicate encryption
- */
- eax = 0x8000001f;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- /* Check whether SEV is supported */
- if (!(eax & BIT(1))) {
+ bitpos = sev_check_cpu_support();
+ if (bitpos < 0) {
if (snp)
error("SEV-SNP support indicated by CC blob, but not CPUID.");
return;
@@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp)
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
- sme_me_mask = BIT_ULL(ebx & 0x3f);
+ sme_me_mask = BIT_ULL(bitpos);
+}
+
+/*
+ * sev_get_status - Retrieve the SEV status mask
+ *
+ * Returns 0 if the CPU is not SEV capable, otherwise the value of the
+ * AMD64_SEV MSR.
+ */
+u64 sev_get_status(void)
+{
+ struct msr m;
+
+ if (sev_check_cpu_support() < 0)
+ return 0;
+
+ boot_rdmsr(MSR_AMD64_SEV, &m);
+ return m.q;
}
/* Search for Confidential Computing blob in the EFI config table. */
@@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
* accessed after switchover.
*/
if (sev_snp_enabled()) {
- unsigned long cc_info_pa = boot_params->cc_blob_address;
+ unsigned long cc_info_pa = boot_params_ptr->cc_blob_address;
struct cc_blob_sev_info *cc_info;
kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f912d7770..d31982509 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -406,7 +406,7 @@ xloadflags:
# define XLF1 0
#endif
-#ifdef CONFIG_EFI_STUB
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
# ifdef CONFIG_EFI_MIXED
# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
# else
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a3725ad46..bd247692b 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -290,6 +290,7 @@ static void efi_stub_entry_update(void)
{
unsigned long addr = efi32_stub_entry;
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
#ifdef CONFIG_X86_64
/* Yes, this is really how we defined it :( */
addr = efi64_stub_entry - 0x200;
@@ -299,6 +300,7 @@ static void efi_stub_entry_update(void)
if (efi32_stub_entry != addr)
die("32-bit and 64-bit EFI entry points do not match\n");
#endif
+#endif
put_unaligned_le32(addr, &buf[0x264]);
}
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index bfb7bcb36..09e99d13f 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/linkage.h>
#include <asm/export.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index e309e7156..ee5def106 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -981,6 +982,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9953d966d..c2383c288 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -223,6 +223,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
/* Restore RDI. */
popq %rdi
swapgs
+ CLEAR_CPU_BUFFERS
jmp .Lnative_iret
@@ -767,6 +769,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1494,6 +1498,12 @@ nmi_restore:
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
+ /*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
@@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(ignore_sysret)
#endif
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d6c08d898..4bcd009a2 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 215d37f7d..a38cc0afc 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -79,4 +79,14 @@
# define BOOT_STACK_SIZE 0x1000
#endif
+#ifndef __ASSEMBLY__
+extern unsigned int output_len;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ void (*error)(char *x));
+
+extern struct boot_params *boot_params_ptr;
+#endif
+
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce0c8f7d3..f835b328b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b12270879..b97a70aa4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -304,7 +304,7 @@
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-
+#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
@@ -477,4 +477,5 @@
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 233ae6986..e601264b1 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -88,6 +88,8 @@ static inline void efi_fpu_end(void)
}
#ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
+
#define arch_efi_call_virt_setup() \
({ \
efi_fpu_begin(); \
@@ -101,8 +103,7 @@ static inline void efi_fpu_end(void)
})
#else /* !CONFIG_X86_32 */
-
-#define EFI_LOADER_SIGNATURE "EL64"
+#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
extern asmlinkage u64 __efi_call(void *fp, ...);
@@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
#ifdef CONFIG_EFI_MIXED
+#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
#define ARCH_HAS_EFISTUB_WRAPPERS
static inline bool efi_is_64bit(void)
@@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+/* Memory Attribute Protocol */
+#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 11203a9fe..ffe72790c 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 800ffce0d..6b4d36c95 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -9,7 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return boot_cpu_has(X86_FEATURE_APIC);
}
-extern void arch_irq_work_raise(void);
#else
static inline bool arch_irq_work_has_interrupt(void)
{
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 071572e23..cbbef3251 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ l_yes:
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
index 8fa6ac0e2..d91b37f5b 100644
--- a/arch/x86/include/asm/kmsan.h
+++ b/arch/x86/include/asm/kmsan.h
@@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr)
{
unsigned long x = (unsigned long)addr;
unsigned long y = x - __START_KERNEL_map;
+ bool ret;
/* use the carry flag to determine if x was < __START_KERNEL_map */
if (unlikely(x > y)) {
@@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr)
return false;
}
- return pfn_valid(x >> PAGE_SHIFT);
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = pfn_valid(x >> PAGE_SHIFT);
+ preempt_enable_no_resched();
+
+ return ret;
}
#endif /* !MODULE */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec955ab2f..005e41dc7 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -168,6 +168,14 @@
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2f123d4fb..8f6f17a86 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,19 @@
#endif
.endm
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
+ verw _ASM_RIP(mds_verw_sel)
+.Lskip_verw_\@:
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -222,6 +235,8 @@ extern void srso_alias_untrain_ret(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+extern void (*x86_return_thunk)(void);
+
#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
@@ -366,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel;
+
#include <asm/segment.h>
/**
@@ -399,17 +415,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 7fa611216..1919ccf49 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -18,7 +18,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
+ asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7ca5c9ec8..cf98fc286 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
@@ -202,12 +203,15 @@ void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
static inline void setup_ghcb(void) { }
@@ -225,6 +229,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
{
return -ENOTTY;
}
+
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
#endif
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 6ca0c661c..c638535ee 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -155,7 +155,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -317,7 +317,7 @@ do { \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@@ -397,7 +397,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -416,7 +416,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -499,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 6c2e3ff3c..724ce4480 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -43,9 +43,9 @@ static inline int cpu_has_vmx(void)
*/
static inline int cpu_vmxoff(void)
{
- asm_volatile_goto("1: vmxoff\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "cc", "memory" : fault);
+ asm goto("1: vmxoff\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "cc", "memory" : fault);
cr4_clear_bits(X86_CR4_VMXE);
return 0;
@@ -129,9 +129,9 @@ static inline void cpu_svm_disable(void)
* case, GIF must already be set, otherwise the NMI would have
* been blocked, so just eat the fault.
*/
- asm_volatile_goto("1: stgi\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "memory" : fault);
+ asm goto("1: stgi\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "memory" : fault);
fault:
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6b8c93989..69f85e274 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -536,6 +536,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
#ifdef CONFIG_RETHUNK
+
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@@ -551,14 +552,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
{
int i = 0;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- return -1;
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (x86_return_thunk == __x86_return_thunk)
+ return -1;
- bytes[i++] = RET_INSN_OPCODE;
+ i = JMP32_INSN_SIZE;
+ __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
+ } else {
+ bytes[i++] = RET_INSN_OPCODE;
+ }
for (; i < insn->length;)
bytes[i++] = INT3_INSN_OPCODE;
-
return i;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 13dffc43d..c68789fdc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -423,7 +420,14 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ /*
+ * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
+ * mitigations, disable KVM-only mitigation in that case.
+ */
+ if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
+ static_branch_disable(&mmio_stale_data_clear);
else
static_branch_enable(&mmio_stale_data_clear);
@@ -476,6 +480,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str)
early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "Register File Data Sampling: " fmt
+
+enum rfds_mitigations {
+ RFDS_MITIGATION_OFF,
+ RFDS_MITIGATION_VERW,
+ RFDS_MITIGATION_UCODE_NEEDED,
+};
+
+/* Default mitigation for Register File Data Sampling */
+static enum rfds_mitigations rfds_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF;
+
+static const char * const rfds_strings[] = {
+ [RFDS_MITIGATION_OFF] = "Vulnerable",
+ [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File",
+ [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+};
+
+static void __init rfds_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) {
+ rfds_mitigation = RFDS_MITIGATION_OFF;
+ return;
+ }
+ if (rfds_mitigation == RFDS_MITIGATION_OFF)
+ return;
+
+ if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ else
+ rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
+}
+
+static __init int rfds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_RFDS))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ rfds_mitigation = RFDS_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ rfds_mitigation = RFDS_MITIGATION_VERW;
+
+ return 0;
+}
+early_param("reg_file_data_sampling", rfds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "" fmt
static void __init md_clear_update_mitigation(void)
@@ -483,12 +538,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
@@ -500,11 +555,19 @@ static void __init md_clear_update_mitigation(void)
taa_mitigation = TAA_MITIGATION_VERW;
taa_select_mitigation();
}
- if (mmio_mitigation == MMIO_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ /*
+ * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear
+ * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
mmio_mitigation = MMIO_MITIGATION_VERW;
mmio_select_mitigation();
}
+ if (rfds_mitigation == RFDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_RFDS)) {
+ rfds_mitigation = RFDS_MITIGATION_VERW;
+ rfds_select_mitigation();
+ }
out:
if (boot_cpu_has_bug(X86_BUG_MDS))
pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
@@ -514,6 +577,8 @@ out:
pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ if (boot_cpu_has_bug(X86_BUG_RFDS))
+ pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]);
}
static void __init md_clear_select_mitigation(void)
@@ -521,11 +586,12 @@ static void __init md_clear_select_mitigation(void)
mds_select_mitigation();
taa_select_mitigation();
mmio_select_mitigation();
+ rfds_select_mitigation();
/*
- * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
- * and print their mitigation after MDS, TAA and MMIO Stale Data
- * mitigation selection is done.
+ * As these mitigations are inter-related and rely on VERW instruction
+ * to clear the microarchitural buffers, update and print their status
+ * after mitigation selection is done for each of these vulnerabilities.
*/
md_clear_update_mitigation();
}
@@ -2589,6 +2655,11 @@ static ssize_t mmio_stale_data_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t rfds_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
@@ -2750,6 +2821,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_SRSO:
return srso_show_state(buf);
+ case X86_BUG_RFDS:
+ return rfds_show_state(buf);
+
default:
break;
}
@@ -2824,4 +2898,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
}
+
+ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 454cdf341..758938c94 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1248,6 +1248,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define SRSO BIT(5)
/* CPU is affected by GDS */
#define GDS BIT(6)
+/* CPU is affected by Register File Data Sampling */
+#define RFDS BIT(7)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1275,9 +1277,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
@@ -1311,6 +1322,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap)
ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
}
+static bool __init vulnerable_to_rfds(u64 ia32_cap)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (ia32_cap & ARCH_CAP_RFDS_NO)
+ return false;
+
+ /*
+ * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to
+ * indicate that mitigation is needed because guest is running on a
+ * vulnerable hardware or may migrate to such hardware:
+ */
+ if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ return true;
+
+ /* Only consult the blacklist when there is no enumeration: */
+ return cpu_matches(cpu_vuln_blacklist, RFDS);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@@ -1419,6 +1448,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SRSO);
}
+ if (vulnerable_to_rfds(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_RFDS);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 427899650..32bd64017 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci)
}
EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);
+
+ /*
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);
split_lock_init();
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f1a748da5..cad6ea191 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -44,6 +44,7 @@
#include <linux/sync_core.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>
+#include <linux/kexec.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
@@ -239,6 +240,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
struct llist_node *pending;
struct mce_evt_llist *l;
int apei_err = 0;
+ struct page *p;
/*
* Allow instrumentation around external facilities usage. Not that it
@@ -292,6 +294,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (!fake_panic) {
if (panic_timeout == 0)
panic_timeout = mca_cfg.panic_timeout;
+
+ /*
+ * Kdump skips the poisoned page in order to avoid
+ * touching the error bits again. Poison the page even
+ * if the error is fatal and the machine is about to
+ * panic.
+ */
+ if (kexec_crash_loaded()) {
+ if (final && (final->status & MCI_STATUS_ADDRV)) {
+ p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
+ if (p)
+ SetPageHWPoison(p);
+ }
+ }
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9dac24680..993734e96 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
- * to be reserved.
+ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+ * kexec and do not need to be reserved.
*/
- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+ if (data->type != SETUP_EFI &&
+ data->type != SETUP_IMA &&
+ data->type != SETUP_RNG_SEED)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbd..247f2225a 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ retry:
if (ret != X86_TRAP_PF)
return false;
- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;
if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;
fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index e07234ec7..ec51ce713 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -361,7 +361,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3b..ed6cce6c3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -522,9 +522,6 @@ nmi_restart:
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 3fbb49168..b32134b09 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -80,7 +80,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
case RET:
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
break;
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a..4e725854c 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@
#define svm_asm(insn, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) "\n\t" \
+ asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@@ -18,7 +18,7 @@ fault: \
#define svm_asm1(insn, op1, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@@ -28,7 +28,7 @@ fault: \
#define svm_asm2(insn, op1, op2, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9a75a0d5d..220cdbe1e 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -38,7 +38,7 @@ static int fixed_pmc_events[] = {1, 0, 7};
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;
pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc..6a9bfdfbb 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 0b5db4de4..0b2cad66d 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- testb $VMX_RUN_VMRESUME, %bl
+ bt $VMX_RUN_VMRESUME_SHIFT, %bx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from 'testb' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 98d732b94..5c1590855 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ vmx_fb_clear_ctrl_available;
/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -2469,10 +2470,10 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
cr4_set_bits(X86_CR4_VMXE);
- asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- : : [vmxon_pointer] "m"(vmxon_pointer)
- : : fault);
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ : : [vmxon_pointer] "m"(vmxon_pointer)
+ : : fault);
return 0;
fault:
@@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
{
guest_state_enter_irqoff();
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index ec268df83..5edab28df 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -73,7 +73,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
- asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"
_ASM_EXTABLE(1b, %l[do_exception])
@@ -166,7 +166,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@@ -183,7 +183,7 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7144e5166..688bc7b72 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1613,7 +1613,8 @@ static unsigned int num_msr_based_features;
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
- ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1650,6 +1651,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_SSB_NO;
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
+ if (!boot_cpu_has_bug(X86_BUG_RFDS))
+ data |= ARCH_CAP_RFDS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f..f50cc210a 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index aa39d678f..dae5c9527 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
- return ma->start - mb->start;
+ return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
- * Find and extend numa_meminfo memblks to cover the @start-@end
- * physical address range, such that the first memblk includes
- * @start, the last memblk includes @end, and any gaps in between
- * are filled.
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
*
* RETURNS:
* 0 : Success
- * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/
int __init numa_fill_memblks(u64 start, u64 end)
@@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
/*
* Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. Exclude (start == bi->end) since
- * end addresses in both a CFMWS range and a memblk range
- * are exclusive.
- *
- * This list of pointers is used to make in-place changes
- * that fill out the numa_meminfo memblks.
+ * overlap start, end. The list is used to make in-place
+ * changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- if (start < bi->end && end >= bi->start) {
+ if (memblock_addrs_overlap(start, end - start, bi->start,
+ bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b69aee624..7913440c0 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip)
u8 *prog = *pprog;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
- emit_jump(&prog, &__x86_return_thunk, ip);
+ emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
if (IS_ENABLED(CONFIG_SLS))
diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h
index c812bf850..46c859625 100644
--- a/arch/xtensa/include/asm/jump_label.h
+++ b/arch/xtensa/include/asm/jump_label.h
@@ -13,7 +13,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"_nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
* make it reachable and wrap both into a no-transform block
* to avoid any assembler interference with this.
*/
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
".begin no-transform\n\t"
"_j %l[l_yes]\n\t"
"2:\n\t"