From dc50eab76b709d68175a358d6e23a5a3890764d3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:39:57 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- arch/x86/entry/common.c | 111 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 34 deletions(-) (limited to 'arch/x86/entry/common.c') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9c0b26ae5..6356060ca 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_XEN_PV #include @@ -71,7 +72,8 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) return false; } -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr) +/* Returns true to return using SYSRET, or false to use IRET */ +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) { add_random_kstack_offset(); nr = syscall_enter_from_user_mode(regs, nr); @@ -85,6 +87,46 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr) instrumentation_end(); syscall_exit_to_user_mode(regs); + + /* + * Check that the register state is valid for using SYSRET to exit + * to userspace. Otherwise use the slower but fully capable IRET + * exit path. + */ + + /* XEN PV guests always use the IRET path */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* SYSRET requires RCX == RIP and R11 == EFLAGS */ + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags)) + return false; + + /* CS and SS must match the values set in MSR_STAR */ + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS)) + return false; + + /* + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. + * + * TASK_SIZE_MAX covers all user-accessible addresses other than + * the deprecated vsyscall page. + */ + if (unlikely(regs->ip >= TASK_SIZE_MAX)) + return false; + + /* + * SYSRET cannot restore RF. It can restore TF, but unlike IRET, + * restoring TF results in a trap from userspace immediately after + * SYSRET. + */ + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF))) + return false; + + /* Use SYSRET to exit to userspace */ + return true; } #endif @@ -98,7 +140,13 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs) } #ifdef CONFIG_IA32_EMULATION -bool __ia32_enabled __ro_after_init = true; +bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); + +static int ia32_emulation_override_cmdline(char *arg) +{ + return kstrtobool(arg, &__ia32_enabled); +} +early_param("ia32_emulation", ia32_emulation_override_cmdline); #endif /* @@ -277,8 +325,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return true; } -/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ -__visible noinstr long do_fast_syscall_32(struct pt_regs *regs) +/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ +__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) { /* * Called using the internal vDSO SYSENTER/SYSCALL32 calling @@ -296,41 +344,36 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) /* Invoke the syscall. If it failed, keep it simple: use IRET. */ if (!__do_fast_syscall_32(regs)) - return 0; + return false; -#ifdef CONFIG_X86_64 /* - * Opportunistic SYSRETL: if possible, try to return using SYSRETL. - * SYSRETL is available on all 64-bit CPUs, so we don't need to - * bother with SYSEXIT. - * - * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, - * because the ECX fixup above will ensure that this is essentially - * never the case. + * Check that the register state is valid for using SYSRETL/SYSEXIT + * to exit to userspace. Otherwise use the slower but fully capable + * IRET exit path. */ - return regs->cs == __USER32_CS && regs->ss == __USER_DS && - regs->ip == landing_pad && - (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; -#else - /* - * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. - * - * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, - * because the ECX fixup above will ensure that this is essentially - * never the case. - * - * We don't allow syscalls at all from VM86 mode, but we still - * need to check VM, because we might be returning from sys_vm86. - */ - return static_cpu_has(X86_FEATURE_SEP) && - regs->cs == __USER_CS && regs->ss == __USER_DS && - regs->ip == landing_pad && - (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; -#endif + + /* XEN PV guests always use the IRET path */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* EIP must point to the VDSO landing pad */ + if (unlikely(regs->ip != landing_pad)) + return false; + + /* CS and SS must match the values set in MSR_STAR */ + if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS)) + return false; + + /* If the TF, RF, or VM flags are set, use IRET */ + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM))) + return false; + + /* Use SYSRETL/SYSEXIT to exit to userspace */ + return true; } -/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ -__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) +/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ +__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs) { /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ regs->sp = regs->bp; -- cgit v1.2.3