summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/entry/calling.h68
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/entry_64.S34
-rw-r--r--arch/x86/entry/entry_64_compat.S11
-rw-r--r--arch/x86/include/asm/cpu_device_id.h168
-rw-r--r--arch/x86/include/asm/cpufeatures.h19
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h4
-rw-r--r--arch/x86/include/asm/intel-family.h6
-rw-r--r--arch/x86/include/asm/msr-index.h18
-rw-r--r--arch/x86/include/asm/nospec-branch.h43
-rw-r--r--arch/x86/kernel/cpu/amd.c31
-rw-r--r--arch/x86/kernel/cpu/bugs.c375
-rw-r--r--arch/x86/kernel/cpu/common.c60
-rw-r--r--arch/x86/kernel/cpu/match.c44
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c16
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/tsx.c33
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/unwind_orc.c2
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/emulate.c104
-rw-r--r--arch/x86/kvm/svm.c11
-rw-r--r--arch/x86/kvm/vmx.c72
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/power/cpu.c22
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h5
-rw-r--r--arch/x86/um/tls_32.c6
28 files changed, 906 insertions, 266 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 993dd06c8..806729a71 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,6 +6,8 @@
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
/*
@@ -146,27 +148,19 @@ For 32-bit we have the following conventions - kernel is built with
.endm
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
- .if \skip_r11rcx
- popq %rsi
- .else
popq %r11
- .endif
popq %r10
popq %r9
popq %r8
popq %rax
- .if \skip_r11rcx
- popq %rsi
- .else
popq %rcx
- .endif
popq %rdx
popq %rsi
.if \pop_rdi
@@ -317,6 +311,62 @@ For 32-bit we have the following conventions - kernel is built with
#endif
/*
+ * IBRS kernel mitigation for Spectre_v2.
+ *
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
+ *
+ * The optional argument is used to save/restore the current value,
+ * which is used on the paranoid paths.
+ *
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+.macro IBRS_ENTER save_reg
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ rdmsr
+ shl $32, %rdx
+ or %rdx, %rax
+ mov %rax, \save_reg
+ test $SPEC_CTRL_IBRS, %eax
+ jz .Ldo_wrmsr_\@
+ lfence
+ jmp .Lend_\@
+.Ldo_wrmsr_\@:
+.endif
+
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+.endm
+
+/*
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
+ * regs. Must be called after the last RET.
+ */
+.macro IBRS_EXIT save_reg
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ mov \save_reg, %rdx
+.else
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ andl $(~SPEC_CTRL_IBRS), %edx
+.endif
+
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+.endm
+
+/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*
* FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 37d9016d4..78b308f2f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -643,7 +643,6 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -652,7 +651,6 @@ ENTRY(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popfl
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index dfe26f3cf..3f418aede 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -235,6 +235,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
/* IRQs are off. */
movq %rax, %rdi
movq %rsp, %rsi
+
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+
call do_syscall_64 /* returns with IRQs disabled */
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -311,8 +315,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
- /* rcx and r11 are already restored (see code above) */
- POP_REGS pop_rdi=0 skip_r11rcx=1
+ IBRS_EXIT
+ POP_REGS pop_rdi=0
/*
* Now all regs are restored except RSP and RDI.
@@ -363,7 +367,6 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -372,7 +375,6 @@ ENTRY(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popfq
@@ -685,6 +687,7 @@ GLOBAL(retint_user)
TRACE_IRQS_IRETQ
GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -1250,7 +1253,13 @@ ENTRY(paranoid_entry)
*/
FENCE_SWAPGS_KERNEL_ENTRY
- ret
+ /*
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
+ * CR3 above, keep the old value in a callee saved register.
+ */
+ IBRS_ENTER save_reg=%r15
+
+ RET
END(paranoid_entry)
/*
@@ -1278,12 +1287,20 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+
+ /*
+ * Must restore IBRS state before both CR3 and %GS since we need access
+ * to the per-CPU x86_spec_ctrl_shadow variable.
+ */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
+
/*
* Save all registers in pt_regs, and switch GS if needed.
*/
@@ -1303,6 +1320,7 @@ ENTRY(error_entry)
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@@ -1367,6 +1385,7 @@ ENTRY(error_entry)
SWAPGS
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1472,6 +1491,8 @@ ENTRY(nmi)
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
+ IBRS_ENTER
+
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
@@ -1695,6 +1716,9 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 40d2834a8..85dd05de6 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -4,7 +4,6 @@
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
@@ -17,6 +16,8 @@
#include <linux/linkage.h>
#include <linux/err.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
@@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat)
xorl %r15d, %r15d /* nospec r15 */
cld
+ IBRS_ENTER
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -253,6 +256,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
*/
TRACE_IRQS_OFF
+ IBRS_ENTER
+
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -262,6 +267,9 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
/* Opportunistic SYSRET */
sysret32_from_system_call:
TRACE_IRQS_ON /* User mode traces as IRQs on. */
+
+ IBRS_EXIT
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -403,6 +411,7 @@ ENTRY(entry_INT80_compat)
* gate turned them off.
*/
TRACE_IRQS_OFF
+ IBRS_ENTER
movq %rsp, %rdi
call do_int80_syscall_32
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index 884466592..cdf39decf 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -1,16 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CPU_DEVICE_ID
-#define _CPU_DEVICE_ID 1
+#ifndef _ASM_X86_CPU_DEVICE_ID
+#define _ASM_X86_CPU_DEVICE_ID
/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
+ *
+ * The wildcard initializers are in mod_devicetable.h because
+ * file2alias needs them. Sigh.
*/
-
#include <linux/mod_devicetable.h>
+/* Get the INTEL_FAM* model defines */
+#include <asm/intel-family.h>
+/* And the X86_VENDOR_* ones */
+#include <asm/processor.h>
-#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/* Centaur FAM6 models */
+#define X86_CENTAUR_FAM6_C7_A 0xa
+#define X86_CENTAUR_FAM6_C7_D 0xd
+#define X86_CENTAUR_FAM6_NANO 0xf
+#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
/**
* X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
@@ -23,8 +33,11 @@
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
- * Backport version to keep the SRBDS pile consistant. No shorter variants
- * required for this.
+ * Use only if you need all selectors. Otherwise use one of the shorter
+ * macros of the X86_MATCH_* family. If there is no matching shorthand
+ * macro, consider to add one. If you really need to wrap one of the macros
+ * into another macro at the usage site for good reasons, then please
+ * start this local macro with X86_MATCH to allow easy grepping.
*/
#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
_steppings, _feature, _data) { \
@@ -36,6 +49,147 @@
.driver_data = (unsigned long) _data \
}
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
+ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@_vendor
+ * @_family: The family number or X86_FAMILY_ANY
+ * @_model: The model number, model constant or X86_MODEL_ANY
+ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
+ X86_STEPPING_ANY, feature, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
+ X86_MODEL_ANY, feature, data)
+
+/**
+ * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
+ X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
+
+/**
+ * X86_MATCH_FEATURE - Macro for matching a CPU feature
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_FEATURE(feature, data) \
+ X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
+
+/* Transitional to keep the existing code working */
+#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL)
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @model: The model number, model constant or X86_MODEL_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
+ X86_FEATURE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM - Match vendor and family
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set of wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
+
+/**
+ * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
+ * @model: The model name without the INTEL_FAM6_ prefix or ANY
+ * The model name is expanded to INTEL_FAM6_@model internally
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * The vendor is set to INTEL, the family to 6 and all other missing
+ * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
+ *
+ * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
+ */
+#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
+
+/*
+ * Match specific microcode revisions.
+ *
+ * vendor/family/model/stepping must be all set.
+ *
+ * Only checks against the boot CPU. When mixed-stepping configs are
+ * valid for a CPU model, add a quirk for every valid stepping and
+ * do the fine-tuning in the quirk handler.
+ */
+
+struct x86_cpu_desc {
+ u8 x86_family;
+ u8 x86_vendor;
+ u8 x86_model;
+ u8 x86_stepping;
+ u32 x86_microcode_rev;
+};
+
+#define INTEL_CPU_DESC(model, stepping, revision) { \
+ .x86_family = 6, \
+ .x86_vendor = X86_VENDOR_INTEL, \
+ .x86_model = (model), \
+ .x86_stepping = (stepping), \
+ .x86_microcode_rev = (revision), \
+}
+
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
-#endif
+#endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index e9b79bac9..bd4374f56 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -202,8 +202,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -283,7 +283,16 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
-#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */
+/* FREE! (11*32+ 6) */
+/* FREE! (11*32+ 7) */
+/* FREE! (11*32+ 8) */
+/* FREE! (11*32+ 9) */
+/* FREE! (11*32+10) */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+18) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -296,6 +305,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -397,6 +407,7 @@
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 00e01d215..e1bd23641 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -682,7 +682,7 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
@@ -690,7 +690,7 @@ struct hv_enlightened_vmcs {
u32 reserved:30;
} hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index db06f1662..1f2f52a34 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -13,6 +13,9 @@
* that group keep the CPUID for the variants sorted by model number.
*/
+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
+#define INTEL_FAM6_ANY X86_MODEL_ANY
+
#define INTEL_FAM6_CORE_YONAH 0x0E
#define INTEL_FAM6_CORE2_MEROM 0x0F
@@ -101,6 +104,9 @@
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
+/* Family 5 */
+#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
+
/* Useful macros */
#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
{ \
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 7a7379953..0bd07699d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -47,6 +47,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -73,6 +75,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -120,6 +123,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
@@ -389,6 +399,11 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
+
+#define MSR_AMD64_DE_CFG 0xc0011029
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
@@ -457,9 +472,6 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
-#define MSR_F10H_DECFG 0xc0011029
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 17a236a8b..8dba996e9 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -9,6 +9,7 @@
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+#include <asm/percpu.h>
/*
* Fill the CPU return stack buffer.
@@ -70,12 +71,11 @@
add $(BITS_PER_LONG/8) * nr, sp;
#endif
-/* Sequence to mitigate PBRSB on eIBRS CPUs */
-#define __ISSUE_UNBALANCED_RET_GUARD(sp) \
- call 881f; \
+#define ISSUE_UNBALANCED_RET_GUARD(sp) \
+ call 992f; \
int3; \
-881: \
- add $(BITS_PER_LONG/8), sp; \
+992: \
+ add $(BITS_PER_LONG/8), sp; \
lfence;
#ifdef __ASSEMBLY__
@@ -165,13 +165,11 @@
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE "jmp .Lskip_rsb_\@", \
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
\ftr
.Lskip_rsb_\@:
-#endif
.endm
#else /* __ASSEMBLY__ */
@@ -252,6 +250,7 @@ enum spectre_v2_mitigation {
SPECTRE_V2_EIBRS,
SPECTRE_V2_EIBRS_RETPOLINE,
SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@@ -280,26 +279,21 @@ extern char __indirect_thunk_end[];
* retpoline and IBRS mitigations for Spectre v2 need this; only on future
* CPUs with IBRS_ALL *might* it be avoided.
*/
-static inline void vmexit_fill_RSB(void)
+static __always_inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
unsigned long loops;
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE("jmp 910f",
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
- X86_FEATURE_RETPOLINE)
+ ALTERNATIVE_2("jmp 910f", "", X86_FEATURE_RSB_VMEXIT,
+ "jmp 911f", X86_FEATURE_RSB_VMEXIT_LITE)
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1))
+ "911:"
+ __stringify(ISSUE_UNBALANCED_RET_GUARD(%1))
"910:"
: "=r" (loops), ASM_CALL_CONSTRAINT
: : "memory" );
#endif
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE("jmp 920f",
- __stringify(__ISSUE_UNBALANCED_RET_GUARD(%0)),
- X86_FEATURE_RSB_VMEXIT_LITE)
- "920:"
- : ASM_CALL_CONSTRAINT
- : : "memory" );
}
static __always_inline
@@ -322,6 +316,9 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void update_spec_ctrl_cond(u64 val);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -331,18 +328,16 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 98c23126f..e017f64e0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -789,8 +789,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
-#define MSR_AMD64_DE_CFG 0xC0011029
-
static void init_amd_ln(struct cpuinfo_x86 *c)
{
/*
@@ -885,12 +883,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID.
- * Always set it, except when running under a hypervisor.
- */
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
- set_cpu_cap(c, X86_FEATURE_CPB);
+ /* Fix up CPUID bits, but only if not virtualised. */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+
+ /* Erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ /*
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
+ * Branch Type Confusion, but predate the allocation of the
+ * BTC_NO bit.
+ */
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
}
static void init_amd(struct cpuinfo_x86 *c)
@@ -951,16 +958,16 @@ static void init_amd(struct cpuinfo_x86 *c)
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
*/
- msr_set_bit(MSR_F10H_DECFG,
- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ msr_set_bit(MSR_AMD64_DE_CFG,
+ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
/*
* Verify that the MSR write was successful (could be running
* under a hypervisor) and only then assume that LFENCE is
* serializing.
*/
- ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
- if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ ret = rdmsrl_safe(MSR_AMD64_DE_CFG, &val);
+ if (!ret && (val & MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)) {
/* A serializing LFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
} else {
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 501d09d59..f1f57acca 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -37,6 +37,8 @@
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
+static void __init retbleed_select_mitigation(void);
+static void __init spectre_v2_user_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
@@ -46,16 +48,47 @@ static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/* The current value of the SPEC_CTRL MSR with task-specific bits set */
+DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+
static DEFINE_MUTEX(spec_ctrl_mutex);
+/* Update SPEC_CTRL MSR and its cached copy unconditionally */
+static void update_spec_ctrl(u64 val)
+{
+ this_cpu_write(x86_spec_ctrl_current, val);
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
/*
- * The vendor and possibly platform specific bits which can be modified in
- * x86_spec_ctrl_base.
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+void update_spec_ctrl_cond(u64 val)
+{
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
+ return;
+
+ this_cpu_write(x86_spec_ctrl_current, val);
+
+ /*
+ * When KERNEL_IBRS this MSR is written on return-to-user, unless
+ * forced the update can be delayed until that time.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
+u64 spec_ctrl_current(void)
+{
+ return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
/*
* AMD specific MSR info for Speculative Store Bypass control.
@@ -105,13 +138,21 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- /* Allow STIBP in MSR_SPEC_CTRL if supported */
- if (boot_cpu_has(X86_FEATURE_STIBP))
- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
-
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
+ /*
+ * retbleed_select_mitigation() relies on the state set by
+ * spectre_v2_select_mitigation(); specifically it wants to know about
+ * spectre_v2=ibrs.
+ */
+ retbleed_select_mitigation();
+ /*
+ * spectre_v2_user_select_mitigation() relies on the state set by
+ * retbleed_select_mitigation(); specifically the STIBP selection is
+ * forced for UNRET.
+ */
+ spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
md_clear_select_mitigation();
@@ -151,31 +192,17 @@ void __init check_bugs(void)
#endif
}
+/*
+ * NOTE: For VMX, this function is not called in the vmexit path.
+ * It uses vmx_spec_ctrl_restore_host() instead.
+ */
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
struct thread_info *ti = current_thread_info();
- /* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- /*
- * Restrict guest_spec_ctrl to supported values. Clear the
- * modifiable bits in the host base value and or the
- * modifiable bits from the guest value.
- */
- guestval = hostval & ~x86_spec_ctrl_mask;
- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
-
- /* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD))
- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
-
- /* Conditional STIBP enabled? */
- if (static_branch_unlikely(&switch_to_cond_stibp))
- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
-
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -705,12 +732,103 @@ static int __init nospectre_v1_cmdline(char *str)
}
early_param("nospectre_v1", nospectre_v1_cmdline);
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
+#undef pr_fmt
+#define pr_fmt(fmt) "RETBleed: " fmt
+
+enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_IBRS,
+ RETBLEED_MITIGATION_EIBRS,
+};
+
+enum retbleed_mitigation_cmd {
+ RETBLEED_CMD_OFF,
+ RETBLEED_CMD_AUTO,
+};
+
+const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+};
+
+static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+ RETBLEED_MITIGATION_NONE;
+static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+ RETBLEED_CMD_AUTO;
+
+static int __init retbleed_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ retbleed_cmd = RETBLEED_CMD_OFF;
+ else if (!strcmp(str, "auto"))
+ retbleed_cmd = RETBLEED_CMD_AUTO;
+ else
+ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
+
+ return 0;
+}
+early_param("retbleed", retbleed_parse_cmdline);
+
+#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
+#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+static void __init retbleed_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+
+ switch (retbleed_cmd) {
+ case RETBLEED_CMD_OFF:
+ return;
+
+ case RETBLEED_CMD_AUTO:
+ default:
+ /*
+ * The Intel mitigation (IBRS) was already selected in
+ * spectre_v2_select_mitigation().
+ */
+
+ break;
+ }
+
+ switch (retbleed_mitigation) {
+ default:
+ break;
+ }
+
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_IBRS:
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
+ break;
+ case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+ pr_err(RETBLEED_INTEL_MSG);
+ }
+ }
+
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
SPECTRE_V2_USER_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
@@ -740,6 +858,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
@@ -781,6 +900,7 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_EIBRS,
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
SPECTRE_V2_CMD_EIBRS_LFENCE,
+ SPECTRE_V2_CMD_IBRS,
};
enum spectre_v2_user_cmd {
@@ -821,13 +941,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
+static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
+
static enum spectre_v2_user_cmd __init
-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_parse_user_cmdline(void)
{
char arg[20];
int ret, i;
- switch (v2_cmd) {
+ switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
@@ -853,15 +975,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
- return (mode == SPECTRE_V2_EIBRS ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_EIBRS_LFENCE);
+ return mode == SPECTRE_V2_IBRS ||
+ mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE;
}
static void __init
-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_user_select_mitigation(void)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
@@ -874,7 +997,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ cmd = spectre_v2_parse_user_cmdline();
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
@@ -922,12 +1045,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
}
/*
- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
- * required.
+ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
+ * STIBP is not required.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return;
/*
@@ -952,6 +1075,7 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
+ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
static const struct {
@@ -969,6 +1093,7 @@ static const struct {
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
+ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
@@ -1031,6 +1156,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
+ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
+ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
@@ -1046,6 +1189,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+/* Disable in-kernel use of non-RSB RET predictors */
+static void __init spec_ctrl_disable_kernel_rrsba(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ if (ia32_cap & ARCH_CAP_RRSBA) {
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ }
+}
+
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
{
/*
@@ -1070,19 +1229,24 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
*/
switch (mode) {
case SPECTRE_V2_NONE:
- /* These modes already fill RSB at vmexit */
- case SPECTRE_V2_LFENCE:
- case SPECTRE_V2_RETPOLINE:
- case SPECTRE_V2_EIBRS_RETPOLINE:
return;
case SPECTRE_V2_EIBRS_LFENCE:
case SPECTRE_V2_EIBRS:
- if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
+ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
}
return;
+
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_RETPOLINE:
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_IBRS:
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
+ return;
}
pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
@@ -1113,6 +1277,14 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_cmd != RETBLEED_CMD_OFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ mode = SPECTRE_V2_IBRS;
+ break;
+ }
+
mode = spectre_v2_select_retpoline();
break;
@@ -1129,6 +1301,10 @@ static void __init spectre_v2_select_mitigation(void)
mode = spectre_v2_select_retpoline();
break;
+ case SPECTRE_V2_CMD_IBRS:
+ mode = SPECTRE_V2_IBRS;
+ break;
+
case SPECTRE_V2_CMD_EIBRS:
mode = SPECTRE_V2_EIBRS;
break;
@@ -1145,10 +1321,9 @@ static void __init spectre_v2_select_mitigation(void)
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
- if (spectre_v2_in_eibrs_mode(mode)) {
- /* Force it so VMEXIT will restore correctly */
+ if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
switch (mode) {
@@ -1156,6 +1331,12 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_EIBRS:
break;
+ case SPECTRE_V2_IBRS:
+ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
+ break;
+
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_EIBRS_LFENCE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
@@ -1167,16 +1348,56 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ /*
+ * Disable alternate RSB predictions in kernel when indirect CALLs and
+ * JMPs gets protection against BHI and Intramode-BTI, but RET
+ * prediction from a non-RSB predictor is still a risk.
+ */
+ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_RETPOLINE)
+ spec_ctrl_disable_kernel_rrsba();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/*
- * If spectre v2 protection has been enabled, unconditionally fill
- * RSB during a context switch; this protects against two independent
- * issues:
+ * If Spectre v2 protection has been enabled, fill the RSB during a
+ * context switch. In general there are two types of RSB attacks
+ * across context switches, for which the CALLs/RETs may be unbalanced.
+ *
+ * 1) RSB underflow
+ *
+ * Some Intel parts have "bottomless RSB". When the RSB is empty,
+ * speculated return targets may come from the branch predictor,
+ * which could have a user-poisoned BTB or BHB entry.
+ *
+ * AMD has it even worse: *all* returns are speculated from the BTB,
+ * regardless of the state of the RSB.
+ *
+ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
+ * scenario is mitigated by the IBRS branch prediction isolation
+ * properties, so the RSB buffer filling wouldn't be necessary to
+ * protect against this type of attack.
+ *
+ * The "user -> user" attack scenario is mitigated by RSB filling.
*
- * - RSB underflow (and switch to BTB) on Skylake+
- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
+ * 2) Poisoned RSB entry
+ *
+ * If the 'next' in-kernel return stack is shorter than 'prev',
+ * 'next' could be tricked into speculating with a user-poisoned RSB
+ * entry.
+ *
+ * The "user -> kernel" attack scenario is mitigated by SMEP and
+ * eIBRS.
+ *
+ * The "user -> user" scenario, also known as SpectreBHB, requires
+ * RSB clearing.
+ *
+ * So to mitigate all cases, unconditionally fill RSB on context
+ * switches.
+ *
+ * FIXME: Is this pointless for retbleed-affected AMD?
*/
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
@@ -1184,28 +1405,29 @@ static void __init spectre_v2_select_mitigation(void)
spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
/*
- * Retpoline means the kernel is safe because it has no indirect
- * branches. Enhanced IBRS protects firmware too, so, enable restricted
- * speculation around firmware calls only when Enhanced IBRS isn't
- * supported.
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+ * enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
- spectre_v2_user_select_mitigation(cmd);
+ spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
{
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
+ update_spec_ctrl(val);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1422,16 +1644,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
}
/*
- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
- * bit in the mask to allow guests to use the mitigation even in the
- * case where the host does not enable it.
- */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
- }
-
- /*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -1448,7 +1660,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -1653,7 +1865,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ update_spec_ctrl(x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
@@ -1888,7 +2100,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1922,7 +2134,7 @@ static char *pbrsb_eibrs_state(void)
{
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
- boot_cpu_has(X86_FEATURE_RETPOLINE))
+ boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
return ", PBRSB-eIBRS: SW sequence";
else
return ", PBRSB-eIBRS: Vulnerable";
@@ -1958,6 +2170,11 @@ static ssize_t srbds_show_state(char *buf)
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
+static ssize_t retbleed_show_state(char *buf)
+{
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2004,6 +2221,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MMIO_UNKNOWN:
return mmio_stale_data_show_state(buf);
+ case X86_BUG_RETBLEED:
+ return retbleed_show_state(buf);
+
default:
break;
}
@@ -2063,4 +2283,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
else
return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
+
+ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 653ced7cb..55293e5dc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -954,8 +954,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define MSBDS_ONLY BIT(5)
#define NO_SWAPGS BIT(6)
#define NO_ITLB_MULTIHIT BIT(7)
-#define NO_EIBRS_PBRSB BIT(8)
-#define NO_MMIO BIT(9)
+#define NO_MMIO BIT(8)
+#define NO_EIBRS_PBRSB BIT(9)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1022,48 +1022,55 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
+#define VULNBL(vendor, family, model, blacklist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
+
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
+#define VULNBL_AMD(family, blacklist) \
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
+
#define SRBDS BIT(0)
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
#define MMIO BIT(1)
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
#define MMIO_SBDS BIT(2)
+/* CPU is affected by RETbleed, speculating where you would not expect it */
+#define RETBLEED BIT(3)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
- VULNBL_INTEL_STEPPINGS(BROADWELL_XEON_D,X86_STEPPINGS(0x3, 0x5), MMIO),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_XEON_D,X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
- BIT(7) | BIT(0xB), MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_MOBILE,X86_STEPPING_ANY, RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_X, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+
+ VULNBL_AMD(0x15, RETBLEED),
+ VULNBL_AMD(0x16, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED),
{}
};
@@ -1169,6 +1176,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
}
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
+ }
+
if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 751e59057..ad6776081 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -16,12 +16,17 @@
* respective wildcard entries.
*
* A typical table entry would be to match a specific CPU
- * { X86_VENDOR_INTEL, 6, 0x12 }
- * or to match a specific CPU feature
- * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
+ * X86_FEATURE_ANY, NULL);
*
* Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
- * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
+ *
+ * asm/cpu_device_id.h contains a set of useful macros which are shortcuts
+ * for various common selections. The above can be shortened to:
+ *
+ * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL);
*
* Arrays used to match for this should also be declared using
* MODULE_DEVICE_TABLE(x86cpu, ...)
@@ -53,3 +58,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
return NULL;
}
EXPORT_SYMBOL(x86_match_cpu);
+
+static const struct x86_cpu_desc *
+x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ const struct x86_cpu_desc *m;
+
+ for (m = match; m->x86_family | m->x86_model; m++) {
+ if (c->x86_vendor != m->x86_vendor)
+ continue;
+ if (c->x86 != m->x86_family)
+ continue;
+ if (c->x86_model != m->x86_model)
+ continue;
+ if (c->x86_stepping != m->x86_stepping)
+ continue;
+ return m;
+ }
+ return NULL;
+}
+
+bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table)
+{
+ const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table);
+
+ if (!res || res->x86_microcode_rev > boot_cpu_data.microcode)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a4e7e100e..8396c77e9 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -222,7 +222,13 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
return ret;
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev >= mc->hdr.patch_id)
+
+ /*
+ * Allow application of the same revision to pick up SMT-specific
+ * changes even if the revision of the other SMT thread is already
+ * up-to-date.
+ */
+ if (rev > mc->hdr.patch_id)
return ret;
if (!__apply_microcode_amd(mc)) {
@@ -304,8 +310,12 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- /* Check whether we have saved a new patch already: */
- if (*new_rev && rev < mc->hdr.patch_id) {
+ /*
+ * Check whether a new patch has been saved already. Also, allow application of
+ * the same revision in order to pick up SMT-thread-specific configuration even
+ * if the sibling SMT thread already has an up-to-date revision.
+ */
+ if (*new_rev && rev <= mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
*new_rev = mc->hdr.patch_id;
return;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 5a52672e3..90bd155d7 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,6 +21,7 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index 032509adf..88a553ee7 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -55,24 +55,6 @@ void tsx_enable(void)
wrmsrl(MSR_IA32_TSX_CTRL, tsx);
}
-static bool __init tsx_ctrl_is_supported(void)
-{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
- /*
- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
- *
- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
- * tsx= cmdline requests will do nothing on CPUs without
- * MSR_IA32_TSX_CTRL support.
- */
- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
-}
-
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
{
if (boot_cpu_has_bug(X86_BUG_TAA))
@@ -86,9 +68,22 @@ void __init tsx_init(void)
char arg[5] = {};
int ret;
- if (!tsx_ctrl_is_supported())
+ /*
+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+ *
+ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+ * tsx= cmdline requests will do nothing on CPUs without
+ * MSR_IA32_TSX_CTRL support.
+ */
+ if (!(x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR))
return;
+ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
+
ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
if (ret >= 0) {
if (!strcmp(arg, "on")) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cd138bfd9..e8d40a597 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -434,7 +434,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ update_spec_ctrl_cond(msr);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 5c48d2c4c..0c0f0eda3 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -668,7 +668,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp < (unsigned long)first_frame))
+ state->sp <= (unsigned long)first_frame))
unwind_next_frame(state);
return;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0489ffc3d..768a765c4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -680,6 +680,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
/*
* IBRS, IBPB and VIRT_SSBD aren't necessarily present in
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 25bdd0696..0cb75d29e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -759,8 +759,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear);
}
-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
- enum x86emul_mode mode)
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
{
ulong linear;
int rc;
@@ -770,41 +769,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
}
+static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
+{
+ u64 efer;
+ struct desc_struct cs;
+ u16 selector;
+ u32 base3;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
+ /* Real mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_REAL;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (ctxt->eflags & X86_EFLAGS_VM) {
+ /* Protected/VM86 mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_VM86;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (efer & EFER_LMA) {
+ if (cs.l) {
+ /* Proper long mode */
+ ctxt->mode = X86EMUL_MODE_PROT64;
+ } else if (cs.d) {
+ /* 32 bit compatibility mode*/
+ ctxt->mode = X86EMUL_MODE_PROT32;
+ } else {
+ ctxt->mode = X86EMUL_MODE_PROT16;
+ }
+ } else {
+ /* Legacy 32 bit / 16 bit mode */
+ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- return assign_eip(ctxt, dst, ctxt->mode);
+ return assign_eip(ctxt, dst);
}
-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
- const struct desc_struct *cs_desc)
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- enum x86emul_mode mode = ctxt->mode;
- int rc;
+ int rc = emulator_recalc_and_set_mode(ctxt);
-#ifdef CONFIG_X86_64
- if (ctxt->mode >= X86EMUL_MODE_PROT16) {
- if (cs_desc->l) {
- u64 efer = 0;
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
- if (efer & EFER_LMA)
- mode = X86EMUL_MODE_PROT64;
- } else
- mode = X86EMUL_MODE_PROT32; /* temporary value */
- }
-#endif
- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- rc = assign_eip(ctxt, dst, mode);
- if (rc == X86EMUL_CONTINUE)
- ctxt->mode = mode;
- return rc;
+ return assign_eip(ctxt, dst);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -1975,7 +2004,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->modrm_reg == VCPU_SREG_SS)
+ if (seg == VCPU_SREG_SS)
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
if (ctxt->op_bytes > 2)
rsp_increment(ctxt, ctxt->op_bytes - 2);
@@ -2192,7 +2221,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2273,7 +2302,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, eip, &new_desc);
+ rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2895,6 +2924,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx;
+ ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
@@ -3491,7 +3521,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
goto fail;
@@ -3638,11 +3668,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /*
+ * CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu's execution mode.
+ */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e1492a67e..cd3432df0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -50,6 +50,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/spec-ctrl.h>
+#include <asm/cpu_device_id.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -4154,9 +4155,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
msr->data = 0;
switch (msr->index) {
- case MSR_F10H_DECFG:
- if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
- msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ case MSR_AMD64_DE_CFG:
+ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break;
default:
return 1;
@@ -4258,7 +4259,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
- case MSR_F10H_DECFG:
+ case MSR_AMD64_DE_CFG:
msr_info->data = svm->msr_decfg;
break;
default:
@@ -4445,7 +4446,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_F10H_DECFG: {
+ case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry;
msr_entry.index = msr->index;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 20ce79458..9bb696d73 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -41,6 +41,7 @@
#include <asm/asm.h>
#include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
#include <asm/io.h>
#include <asm/desc.h>
#include <asm/vmx.h>
@@ -2132,9 +2133,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
if (!vmx->disable_fb_clear)
return;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
msr |= FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
/* Cache the MSR value to avoid reading it later */
vmx->msr_ia32_mcu_opt_ctrl = msr;
}
@@ -2145,7 +2146,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
return;
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
}
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
@@ -10759,10 +10760,35 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->hv_timer_armed = false;
}
+u64 __always_inline vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx)
+{
+ u64 guestval, hostval = this_cpu_read(x86_spec_ctrl_current);
+
+ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
+ return 0;
+
+ guestval = __rdmsr(MSR_IA32_SPEC_CTRL);
+
+ /*
+ *
+ * For legacy IBRS, the IBRS bit always needs to be written after
+ * transitioning from a less privileged predictor mode, regardless of
+ * whether the guest/host values differ.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ guestval != hostval)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+ barrier_nospec();
+
+ return guestval;
+}
+
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4, evmcs_rsp;
+ u64 spec_ctrl;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -10988,9 +11014,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
- /* Eliminate branch target predictions from guest mode */
+ /*
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+ * the first unbalanced RET after vmexit!
+ *
+ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
+ * entries and (in some cases) RSB underflow.
+ *
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
+ * need the RSB filling sequence. But it does need to be enabled, and a
+ * single call to retire, before the first unbalanced RET.
+ *
+ * So no RETs before vmx_spec_ctrl_restore_host() below.
+ */
vmexit_fill_RSB();
+ /* Save this for below */
+ spec_ctrl = vmx_spec_ctrl_restore_host(vmx);
+
vmx_enable_fb_clear(vmx);
/*
@@ -11009,9 +11050,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* save it.
*/
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+ vmx->spec_ctrl = spec_ctrl;
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
@@ -13276,14 +13315,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
*/
vmcs12_save_pending_event(vcpu, vmcs12);
}
-
- /*
- * Drop what we picked up for L2 via vmx_complete_interrupts. It is
- * preserved above and would only end up incorrectly in L1.
- */
- vcpu->arch.nmi_injected = false;
- kvm_clear_exception_queue(vcpu);
- kvm_clear_interrupt_queue(vcpu);
}
/*
@@ -13617,6 +13648,17 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
}
+ /*
+ * Drop events/exceptions that were queued for re-injection to L2
+ * (picked up via vmx_complete_interrupts()), as well as exceptions
+ * that were pending for L2. Note, this must NOT be hoisted above
+ * prepare_vmcs12(), events/exceptions queued for re-injection need to
+ * be captured in vmcs12 (see vmcs12_save_pending_event()).
+ */
+ vcpu->arch.nmi_injected = false;
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
+
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
vm_entry_controls_reset_shadow(vmx);
vm_exit_controls_reset_shadow(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be4697d91..8fe615df8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1156,7 +1156,7 @@ static u32 msr_based_features[] = {
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
- MSR_F10H_DECFG,
+ MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
};
@@ -9736,9 +9736,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index adc77904f..7da9b4270 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -171,9 +171,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Mappings have to be page-aligned
*/
offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PHYSICAL_PAGE_MASK;
+ phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
+ /*
+ * Mask out any bits not part of the actual physical
+ * address, like memory encryption bits.
+ */
+ phys_addr &= PHYSICAL_PAGE_MASK;
+
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm);
if (retval) {
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 794824948..75cd943f0 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -527,15 +527,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c)
static void pm_save_spec_msr(void)
{
- u32 spec_msr_id[] = {
- MSR_IA32_SPEC_CTRL,
- MSR_IA32_TSX_CTRL,
- MSR_TSX_FORCE_ABORT,
- MSR_IA32_MCU_OPT_CTRL,
- MSR_AMD64_LS_CFG,
+ struct msr_enumeration {
+ u32 msr_no;
+ u32 feature;
+ } msr_enum[] = {
+ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL },
+ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL },
+ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT },
+ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL },
+ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD },
+ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC },
};
+ int i;
- msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
+ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
+ if (boot_cpu_has(msr_enum[i].feature))
+ msr_build_context(&msr_enum[i].msr_no, 1);
+ }
}
static int pm_check_save_msr(void)
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 68fd2cf52..f6e9f8439 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -6,10 +6,9 @@
#include <asm/unistd.h>
#include <sysdep/ptrace.h>
-typedef long syscall_handler_t(struct pt_regs);
+typedef long syscall_handler_t(struct syscall_args);
extern syscall_handler_t *sys_call_table[];
#define EXECUTE_SYSCALL(syscall, regs) \
- ((long (*)(struct syscall_args)) \
- (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
+ ((*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index 5bd949da7..b69ab2409 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -65,9 +65,6 @@ static int get_free_idx(struct task_struct* task)
struct thread_struct *t = &task->thread;
int idx;
- if (!t->arch.tls_array)
- return GDT_ENTRY_TLS_MIN;
-
for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
if (!t->arch.tls_array[idx].present)
return idx + GDT_ENTRY_TLS_MIN;
@@ -242,9 +239,6 @@ static int get_tls_entry(struct task_struct *task, struct user_desc *info,
{
struct thread_struct *t = &task->thread;
- if (!t->arch.tls_array)
- goto clear;
-
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;