From 638a9e433ecd61e64761352dbec1fa4f5874c941 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 7 Aug 2024 15:18:06 +0200 Subject: Merging upstream version 6.10.3. Signed-off-by: Daniel Baumann --- arch/x86/include/asm/acpi.h | 2 + arch/x86/include/asm/alternative.h | 14 -- arch/x86/include/asm/apic.h | 8 +- arch/x86/include/asm/asm.h | 3 - arch/x86/include/asm/atomic.h | 12 +- arch/x86/include/asm/atomic64_32.h | 79 +++++++---- arch/x86/include/asm/atomic64_64.h | 12 +- arch/x86/include/asm/barrier.h | 24 ++-- arch/x86/include/asm/bitops.h | 10 +- arch/x86/include/asm/boot.h | 5 - arch/x86/include/asm/cmpxchg_32.h | 203 ++++++++++++++++++----------- arch/x86/include/asm/cmpxchg_64.h | 6 + arch/x86/include/asm/cpu_device_id.h | 8 ++ arch/x86/include/asm/cpufeature.h | 16 ++- arch/x86/include/asm/extable_fixup_types.h | 2 +- arch/x86/include/asm/fb.h | 19 --- arch/x86/include/asm/fpu.h | 13 ++ arch/x86/include/asm/fpu/api.h | 3 + arch/x86/include/asm/fpu/types.h | 6 +- arch/x86/include/asm/hardirq.h | 6 + arch/x86/include/asm/ia32.h | 11 -- arch/x86/include/asm/ia32_unistd.h | 12 -- arch/x86/include/asm/idtentry.h | 6 + arch/x86/include/asm/inat.h | 17 ++- arch/x86/include/asm/insn.h | 32 ++++- arch/x86/include/asm/intel-family.h | 84 ++++++++++++ arch/x86/include/asm/io.h | 18 +++ arch/x86/include/asm/irq_remapping.h | 7 + arch/x86/include/asm/irq_vectors.h | 8 +- arch/x86/include/asm/kexec.h | 13 +- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 67 ++++++---- arch/x86/include/asm/mce.h | 2 + arch/x86/include/asm/mpspec.h | 6 +- arch/x86/include/asm/msr-index.h | 9 +- arch/x86/include/asm/page_types.h | 8 +- arch/x86/include/asm/percpu.h | 127 ++++++++---------- arch/x86/include/asm/pgtable.h | 20 +-- arch/x86/include/asm/pgtable_64.h | 1 + arch/x86/include/asm/posted_intr.h | 118 +++++++++++++++++ arch/x86/include/asm/processor.h | 32 +++-- arch/x86/include/asm/prom.h | 9 +- arch/x86/include/asm/qspinlock.h | 13 +- arch/x86/include/asm/qspinlock_paravirt.h | 7 +- arch/x86/include/asm/seccomp.h | 2 +- arch/x86/include/asm/sev-common.h | 8 +- arch/x86/include/asm/sev.h | 2 +- arch/x86/include/asm/shstk.h | 2 + arch/x86/include/asm/special_insns.h | 8 +- arch/x86/include/asm/string_64.h | 45 ++++--- arch/x86/include/asm/text-patching.h | 2 +- arch/x86/include/asm/vdso/gettimeofday.h | 44 ++++--- arch/x86/include/asm/video.h | 21 +++ arch/x86/include/asm/vm86.h | 2 +- arch/x86/include/asm/vmx.h | 13 ++ arch/x86/include/asm/vmxfeatures.h | 2 +- 56 files changed, 792 insertions(+), 439 deletions(-) delete mode 100644 arch/x86/include/asm/fb.h create mode 100644 arch/x86/include/asm/fpu.h delete mode 100644 arch/x86/include/asm/ia32_unistd.h create mode 100644 arch/x86/include/asm/posted_intr.h create mode 100644 arch/x86/include/asm/video.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index f896eed451..5af926c050 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -56,6 +56,8 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); +extern int acpi_blacklisted(void); + static inline void acpi_noirq_set(void) { acpi_noirq = 1; } static inline void acpi_disable_pci(void) { diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0cb2396de0..ba99ef75f5 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -286,20 +286,6 @@ static inline int alternatives_text_reserved(void *start, void *end) asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) -/* - * This is similar to alternative_input. But it has two features and - * respective instructions. - * - * If CPU has feature2, newinstr2 is used. - * Otherwise, if CPU has feature1, newinstr1 is used. - * Otherwise, oldinstr is used. - */ -#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ - ft_flags2, input...) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ - newinstr2, ft_flags2) \ - : : "i" (0), ## input) - /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e6ab0cf15e..9327eb00e9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -14,6 +14,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -92,7 +93,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); - alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, + alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, ASM_OUTPUT2("=r" (v), "=m" (*addr)), ASM_OUTPUT2("0" (v), "m" (*addr))); } @@ -500,6 +501,11 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector) return !!(irr & (1U << (vector % 32))); } +static inline bool is_vector_pending(unsigned int vector) +{ + return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector); +} + /* * Warm reset vector position: */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index ca8eed1d49..2bec0c89a9 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -229,9 +229,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define _ASM_EXTABLE_UA(from, to) \ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) -#define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY) - #define _ASM_EXTABLE_FAULT(from, to) \ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 55a55ec043..55b4d24356 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) } #define arch_atomic_add_return arch_atomic_add_return -static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) -{ - return arch_atomic_add_return(-i, v); -} -#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v) static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { @@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) } #define arch_atomic_fetch_add arch_atomic_fetch_add -static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) -{ - return xadd(&v->counter, -i); -} -#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v) static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index d510405e4e..8db2ec4d6c 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -14,6 +14,32 @@ typedef struct { #define ATOMIC64_INIT(val) { (val) } +/* + * Read an atomic64_t non-atomically. + * + * This is intended to be used in cases where a subsequent atomic operation + * will handle the torn value, and can be used to prime the first iteration + * of unconditional try_cmpxchg() loops, e.g.: + * + * s64 val = arch_atomic64_read_nonatomic(v); + * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i); + * + * This is NOT safe to use where the value is not always checked by a + * subsequent atomic operation, such as in conditional try_cmpxchg() loops + * that can break before the atomic operation, e.g.: + * + * s64 val = arch_atomic64_read_nonatomic(v); + * do { + * if (condition(val)) + * break; + * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i); + */ +static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v) +{ + /* See comment in arch_atomic_read(). */ + return __READ_ONCE(v->counter); +} + #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) #ifndef ATOMIC64_EXPORT #define ATOMIC64_DECL_ONE __ATOMIC64_DECL @@ -61,12 +87,18 @@ ATOMIC64_DECL(add_unless); #undef __ATOMIC64_DECL #undef ATOMIC64_EXPORT -static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { - return arch_cmpxchg64(&v->counter, o, n); + return arch_cmpxchg64(&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + return arch_try_cmpxchg64(&v->counter, old, new); +} +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg + static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { s64 o; @@ -195,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); } static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); - return old; + return val; } #define arch_atomic64_fetch_and arch_atomic64_fetch_and static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); } static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); - return old; + return val; } #define arch_atomic64_fetch_or arch_atomic64_fetch_or static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); } static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); - return old; + return val; } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { - s64 old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i)); - return old; + return val; } #define arch_atomic64_fetch_add arch_atomic64_fetch_add diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 3165c0feed..ae12acae5b 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) } #define arch_atomic64_add_return arch_atomic64_add_return -static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) -{ - return arch_atomic64_add_return(-i, v); -} -#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v) static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { @@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) -{ - return xadd(&v->counter, -i); -} -#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v) static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 63bdc6b852..7b44b3c4cc 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -33,20 +33,16 @@ * Returns: * 0 - (index < size) */ -static __always_inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) -{ - unsigned long mask; - - asm volatile ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) - :"g"(size),"r" (index) - :"cc"); - return mask; -} - -/* Override the default implementation from linux/nospec.h. */ -#define array_index_mask_nospec array_index_mask_nospec +#define array_index_mask_nospec(idx,sz) ({ \ + typeof((idx)+(sz)) __idx = (idx); \ + typeof(__idx) __sz = (sz); \ + unsigned long __mask; \ + asm volatile ("cmp %1,%2; sbb %0,%0" \ + :"=r" (__mask) \ + :ASM_INPUT_G (__sz), \ + "r" (__idx) \ + :"cc"); \ + __mask; }) /* Prevent speculative execution past this barrier. */ #define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 990eb686ca..b96d45944c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) - : "rm" (word)); + : ASM_INPUT_RM (word)); return word; } @@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word) asm("bsr %1,%0" : "=r" (word) - : "rm" (word)); + : ASM_INPUT_RM (word)); return word; } @@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x) */ asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x) */ asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x) */ asm("bsrq %1,%q0" : "+r" (bitpos) - : "rm" (x)); + : ASM_INPUT_RM (x)); return bitpos + 1; } #else diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index a3e0be0470..3e5b111e61 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -6,11 +6,6 @@ #include #include -/* Physical address where kernel should be loaded. */ -#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ - + (CONFIG_PHYSICAL_ALIGN - 1)) \ - & ~(CONFIG_PHYSICAL_ALIGN - 1)) - /* Minimum kernel alignment, as a power of two */ #ifdef CONFIG_X86_64 # define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index b5731c51f0..62cef2113c 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -3,103 +3,148 @@ #define _ASM_X86_CMPXCHG_32_H /* - * Note: if you use set64_bit(), __cmpxchg64(), or their variants, + * Note: if you use __cmpxchg64(), or their variants, * you need to test for the feature in boot_cpu_data. */ -#ifdef CONFIG_X86_CMPXCHG64 -#define arch_cmpxchg64(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ - (unsigned long long)(n))) -#define arch_cmpxchg64_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ - (unsigned long long)(n))) -#define arch_try_cmpxchg64(ptr, po, n) \ - __try_cmpxchg64((ptr), (unsigned long long *)(po), \ - (unsigned long long)(n)) -#endif +union __u64_halves { + u64 full; + struct { + u32 low, high; + }; +}; + +#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \ +({ \ + union __u64_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm volatile(_lock "cmpxchg8b %[ptr]" \ + : [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + o.full; \ +}) + + +static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX); +} -static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) { - u64 prev; - asm volatile(LOCK_PREFIX "cmpxchg8b %1" - : "=A" (prev), - "+m" (*ptr) - : "b" ((u32)new), - "c" ((u32)(new >> 32)), - "0" (old) - : "memory"); - return prev; + return __arch_cmpxchg64(ptr, old, new,); } -static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(_lock "cmpxchg8b %[ptr]" \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) { - u64 prev; - asm volatile("cmpxchg8b %1" - : "=A" (prev), - "+m" (*ptr) - : "b" ((u32)new), - "c" ((u32)(new >> 32)), - "0" (old) - : "memory"); - return prev; + return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX); } -static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) +static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) { - bool success; - u64 old = *pold; - asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]" - CC_SET(z) - : CC_OUT(z) (success), - [ptr] "+m" (*ptr), - "+A" (old) - : "b" ((u32)new), - "c" ((u32)(new >> 32)) - : "memory"); - - if (unlikely(!success)) - *pold = old; - return success; + return __arch_try_cmpxchg64(ptr, oldp, new,); } -#ifndef CONFIG_X86_CMPXCHG64 +#ifdef CONFIG_X86_CMPXCHG64 + +#define arch_cmpxchg64 __cmpxchg64 + +#define arch_cmpxchg64_local __cmpxchg64_local + +#define arch_try_cmpxchg64 __try_cmpxchg64 + +#define arch_try_cmpxchg64_local __try_cmpxchg64_local + +#else + /* * Building a kernel capable running on 80386 and 80486. It may be necessary * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -#define arch_cmpxchg64(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (o); \ - __typeof__(*(ptr)) __new = (n); \ - alternative_io(LOCK_PREFIX_HERE \ - "call cmpxchg8b_emu", \ - "lock; cmpxchg8b (%%esi)" , \ - X86_FEATURE_CX8, \ - "=A" (__ret), \ - "S" ((ptr)), "0" (__old), \ - "b" ((unsigned int)__new), \ - "c" ((unsigned int)(__new>>32)) \ - : "memory"); \ - __ret; }) - - -#define arch_cmpxchg64_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (o); \ - __typeof__(*(ptr)) __new = (n); \ - alternative_io("call cmpxchg8b_emu", \ - "cmpxchg8b (%%esi)" , \ - X86_FEATURE_CX8, \ - "=A" (__ret), \ - "S" ((ptr)), "0" (__old), \ - "b" ((unsigned int)__new), \ - "c" ((unsigned int)(__new>>32)) \ - : "memory"); \ - __ret; }) +#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \ +({ \ + union __u64_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm volatile(ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ + : "memory"); \ + \ + o.full; \ +}) + +static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; "); +} +#define arch_cmpxchg64 arch_cmpxchg64 + +static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64_emu(ptr, old, new, ,); +} +#define arch_cmpxchg64_local arch_cmpxchg64_local + +#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; "); +} +#define arch_try_cmpxchg64 arch_try_cmpxchg64 + +static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,); +} +#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local #endif diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index c1d6cd58f8..5e241306db 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,6 +20,12 @@ arch_try_cmpxchg((ptr), (po), (n)); \ }) +#define arch_try_cmpxchg64_local(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg_local((ptr), (po), (n)); \ +}) + union __u128_halves { u128 full; struct { diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index e8e3dbe7f1..b6325ee308 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -288,6 +288,14 @@ struct x86_cpu_desc { .x86_microcode_rev = (revision), \ } +#define AMD_CPU_DESC(fam, model, stepping, revision) { \ + .x86_family = (fam), \ + .x86_vendor = X86_VENDOR_AMD, \ + .x86_model = (model), \ + .x86_stepping = (stepping), \ + .x86_microcode_rev = (revision), \ +} + extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3508f3fc92..0b9611da6c 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -129,8 +129,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, \ - (unsigned long __percpu *)&cpu_info.x86_capability)) + x86_this_cpu_test_bit(bit, cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel @@ -150,8 +149,12 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); -#define setup_force_cpu_cap(bit) do { \ - set_cpu_cap(&boot_cpu_data, bit); \ +#define setup_force_cpu_cap(bit) do { \ + \ + if (!boot_cpu_has(bit)) \ + WARN_ON(alternatives_patched); \ + \ + set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) @@ -172,11 +175,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm goto( - ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") + asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" - " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" + " testb %[bitnum], %a[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" ".popsection\n" diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 7acf0383be..906b0d5541 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -36,7 +36,7 @@ #define EX_TYPE_DEFAULT 1 #define EX_TYPE_FAULT 2 #define EX_TYPE_UACCESS 3 -#define EX_TYPE_COPY 4 +/* unused, was: #define EX_TYPE_COPY 4 */ #define EX_TYPE_CLEAR_FS 5 #define EX_TYPE_FPU_RESTORE 6 #define EX_TYPE_BPF 7 diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h deleted file mode 100644 index c3b9582de7..0000000000 --- a/arch/x86/include/asm/fb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_FB_H -#define _ASM_X86_FB_H - -#include - -struct fb_info; - -pgprot_t pgprot_framebuffer(pgprot_t prot, - unsigned long vm_start, unsigned long vm_end, - unsigned long offset); -#define pgprot_framebuffer pgprot_framebuffer - -int fb_is_primary_device(struct fb_info *info); -#define fb_is_primary_device fb_is_primary_device - -#include - -#endif /* _ASM_X86_FB_H */ diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h new file mode 100644 index 0000000000..b2743fe193 --- /dev/null +++ b/arch/x86/include/asm/fpu.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +#include + +#define kernel_fpu_available() true + +#endif /* ! _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a2be3aefff..f86ad33355 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe extern u64 xstate_get_guest_group_perm(void); +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index ace9aa3b78..eb17f31b06 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -2,8 +2,8 @@ /* * FPU data structures: */ -#ifndef _ASM_X86_FPU_H -#define _ASM_X86_FPU_H +#ifndef _ASM_X86_FPU_TYPES_H +#define _ASM_X86_FPU_TYPES_H #include @@ -596,4 +596,4 @@ struct fpu_state_config { /* FPU state configuration information */ extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; -#endif /* _ASM_X86_FPU_H */ +#endif /* _ASM_X86_FPU_TYPES_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index fbc7722b87..c67fa6ad09 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -44,10 +44,16 @@ typedef struct { unsigned int irq_hv_reenlightenment_count; unsigned int hyperv_stimer0_count; #endif +#ifdef CONFIG_X86_POSTED_MSI + unsigned int posted_msi_notification_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +#ifdef CONFIG_X86_POSTED_MSI +DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); +#endif #define __ARCH_IRQ_STAT #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 4212c00c97..9d69f3f8db 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -56,17 +56,6 @@ struct stat64 { unsigned long long st_ino; } __attribute__((packed)); -#define IA32_STACK_TOP IA32_PAGE_OFFSET - -#ifdef __KERNEL__ -struct linux_binprm; -extern int ia32_setup_arg_pages(struct linux_binprm *bprm, - unsigned long stack_top, int exec_stack); -struct mm_struct; -extern void ia32_pick_mmap_layout(struct mm_struct *mm); - -#endif - extern bool __ia32_enabled; static __always_inline bool ia32_enabled(void) diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h deleted file mode 100644 index aa065c94cc..0000000000 --- a/arch/x86/include/asm/ia32_unistd.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_IA32_UNISTD_H -#define _ASM_X86_IA32_UNISTD_H - -/* - * This file contains the system call numbers of the ia32 compat ABI, - * this is for the kernel only. - */ -#define __SYSCALL_ia32_NR(x) (x) -#include - -#endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 749c7411d2..d4f24499b2 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -751,6 +751,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested # define fred_sysvec_kvm_posted_intr_nested_ipi NULL #endif +# ifdef CONFIG_X86_POSTED_MSI +DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification); +#else +# define fred_sysvec_posted_msi_notification NULL +# endif + #if IS_ENABLED(CONFIG_HYPERV) DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index b56c574158..53e4015242 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -35,6 +35,8 @@ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_PFX_EVEX 15 /* EVEX prefix */ +/* x86-64 REX2 prefix */ +#define INAT_PFX_REX2 16 /* 0xD5 */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -50,7 +52,7 @@ /* Legacy prefix */ #define INAT_PFX_OFFS 0 -#define INAT_PFX_BITS 4 +#define INAT_PFX_BITS 5 #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) /* Escape opcodes */ @@ -77,6 +79,9 @@ #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) +#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8)) +#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9)) +#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -128,6 +133,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr) return (attr & INAT_PFX_MASK) == INAT_PFX_REX; } +static inline int inat_is_rex2_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX2; +} + static inline int inat_last_prefix_id(insn_attr_t attr) { if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) @@ -227,4 +237,9 @@ static inline int inat_must_evex(insn_attr_t attr) { return attr & INAT_EVEXONLY; } + +static inline int inat_evex_scalable(insn_attr_t attr) +{ + return attr & INAT_EVEX_SCALABLE; +} #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 1b29f58f73..7152ea809e 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -112,10 +112,15 @@ struct insn { #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) #define X86_SIB_BASE(sib) ((sib) & 0x07) -#define X86_REX_W(rex) ((rex) & 8) -#define X86_REX_R(rex) ((rex) & 4) -#define X86_REX_X(rex) ((rex) & 2) -#define X86_REX_B(rex) ((rex) & 1) +#define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */ +#define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */ +#define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */ +#define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */ + +#define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */ +#define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */ +#define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */ +#define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */ /* VEX bit flags */ #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ @@ -161,6 +166,18 @@ static inline void insn_get_attribute(struct insn *insn) /* Instruction uses RIP-relative addressing */ extern int insn_rip_relative(struct insn *insn); +static inline int insn_is_rex2(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return insn->rex_prefix.nbytes == 2; +} + +static inline insn_byte_t insn_rex2_m_bit(struct insn *insn) +{ + return X86_REX2_M(insn->rex_prefix.bytes[1]); +} + static inline int insn_is_avx(struct insn *insn) { if (!insn->prefixes.got) @@ -198,6 +215,13 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) return X86_VEX_P(insn->vex_prefix.bytes[2]); } +static inline insn_byte_t insn_vex_w_bit(struct insn *insn) +{ + if (insn->vex_prefix.nbytes < 3) + return 0; + return X86_VEX_W(insn->vex_prefix.bytes[2]); +} + /* Get the last prefix id from last prefix or VEX prefix */ static inline int insn_last_prefix_id(struct insn *insn) { diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index d0941f4c27..f81a851c46 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -40,137 +40,221 @@ * their own names :-( */ +#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model) + /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ #define INTEL_FAM6_ANY X86_MODEL_ANY +/* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */ +#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) #define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_CORE_YONAH IFM(6, 0x0E) #define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_CORE2_MEROM IFM(6, 0x0F) #define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_CORE2_MEROM_L IFM(6, 0x16) #define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_CORE2_PENRYN IFM(6, 0x17) #define INTEL_FAM6_CORE2_DUNNINGTON 0x1D +#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D) #define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_NEHALEM IFM(6, 0x1E) #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_NEHALEM_EP IFM(6, 0x1A) #define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_NEHALEM_EX IFM(6, 0x2E) #define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_WESTMERE IFM(6, 0x25) #define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_WESTMERE_EP IFM(6, 0x2C) #define INTEL_FAM6_WESTMERE_EX 0x2F +#define INTEL_WESTMERE_EX IFM(6, 0x2F) #define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_SANDYBRIDGE IFM(6, 0x2A) #define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D) #define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_IVYBRIDGE IFM(6, 0x3A) #define INTEL_FAM6_IVYBRIDGE_X 0x3E +#define INTEL_IVYBRIDGE_X IFM(6, 0x3E) #define INTEL_FAM6_HASWELL 0x3C +#define INTEL_HASWELL IFM(6, 0x3C) #define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_HASWELL_X IFM(6, 0x3F) #define INTEL_FAM6_HASWELL_L 0x45 +#define INTEL_HASWELL_L IFM(6, 0x45) #define INTEL_FAM6_HASWELL_G 0x46 +#define INTEL_HASWELL_G IFM(6, 0x46) #define INTEL_FAM6_BROADWELL 0x3D +#define INTEL_BROADWELL IFM(6, 0x3D) #define INTEL_FAM6_BROADWELL_G 0x47 +#define INTEL_BROADWELL_G IFM(6, 0x47) #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_BROADWELL_X IFM(6, 0x4F) #define INTEL_FAM6_BROADWELL_D 0x56 +#define INTEL_BROADWELL_D IFM(6, 0x56) #define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */ +#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */ +#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */ +#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */ /* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */ /* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */ #define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */ +#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */ /* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ /* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ /* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ #define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */ +#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */ /* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ #define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */ +#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */ #define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */ +#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */ #define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */ +#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */ #define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */ +#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */ +#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */ +#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ +#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ +#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */ #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ +#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ +#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ +#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */ #define INTEL_FAM6_EMERALDRAPIDS_X 0xCF +#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) #define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) #define INTEL_FAM6_GRANITERAPIDS_D 0xAE +#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) /* "Hybrid" Processors (P-Core/E-Core) */ #define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ +#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */ #define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ +#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_RAPTORLAKE_P IFM(6, 0xBA) #define INTEL_FAM6_RAPTORLAKE_S 0xBF +#define INTEL_RAPTORLAKE_S IFM(6, 0xBF) #define INTEL_FAM6_METEORLAKE 0xAC +#define INTEL_METEORLAKE IFM(6, 0xAC) #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_METEORLAKE_L IFM(6, 0xAA) #define INTEL_FAM6_ARROWLAKE_H 0xC5 +#define INTEL_ARROWLAKE_H IFM(6, 0xC5) #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_ARROWLAKE IFM(6, 0xC6) #define INTEL_FAM6_ARROWLAKE_U 0xB5 +#define INTEL_ARROWLAKE_U IFM(6, 0xB5) #define INTEL_FAM6_LUNARLAKE_M 0xBD +#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */ #define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */ #define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */ #define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ +#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */ #define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */ #define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */ +#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */ #define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ +#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */ #define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */ #define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */ +#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */ +#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */ /* Note: the micro-architecture is "Goldmont Plus" */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ +#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ +#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */ #define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ +#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */ #define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ +#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ /* Family 5 */ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 294cd2a408..1d60427379 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -42,6 +42,7 @@ #include #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -209,6 +210,23 @@ void memset_io(volatile void __iomem *, int, size_t); #define memcpy_toio memcpy_toio #define memset_io memset_io +#ifdef CONFIG_X86_64 +/* + * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for + * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy + * loop. + */ +static inline void __iowrite32_copy(void __iomem *to, const void *from, + size_t count) +{ + asm volatile("rep ; movsl" + : "=&c"(count), "=&D"(to), "=&S"(from) + : "0"(count), "1"(to), "2"(from) + : "memory"); +} +#define __iowrite32_copy __iowrite32_copy +#endif + /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 7a2ed154a5..5036f13ab6 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } +extern bool enable_posted_msi; + +static inline bool posted_msi_supported(void) +{ + return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP); +} + #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d18bfb238f..13aea8fc3d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -97,10 +97,16 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb + #define NR_VECTORS 256 #ifdef CONFIG_X86_LOCAL_APIC -#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 91ca9a9ee3..ae5482a2f0 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -207,18 +207,11 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image); extern void kdump_nmi_shootdown_cpus(void); #ifdef CONFIG_CRASH_HOTPLUG -void arch_crash_handle_hotplug_event(struct kimage *image); +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event -#ifdef CONFIG_HOTPLUG_CPU -int arch_crash_hotplug_cpu_support(void); -#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support -#endif - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_crash_hotplug_memory_support(void); -#define crash_hotplug_memory_support arch_crash_hotplug_memory_support -#endif +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support unsigned int arch_crash_get_elfcorehdr_size(void); #define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 110d7f29ca..8d5a2b4f5f 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -85,7 +85,6 @@ KVM_X86_OP_OPTIONAL(update_cr8_intercept) KVM_X86_OP(refresh_apicv_exec_ctrl) KVM_X86_OP_OPTIONAL(hwapic_irr_update) KVM_X86_OP_OPTIONAL(hwapic_isr_update) -KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt) KVM_X86_OP_OPTIONAL(load_eoi_exitmap) KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) KVM_X86_OP_OPTIONAL(set_apic_access_page_addr) @@ -121,6 +120,7 @@ KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) #endif +KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6efd1497b0..d0274b3be2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -254,28 +254,31 @@ enum x86_intercept_stage; KVM_GUESTDBG_INJECT_DB | \ KVM_GUESTDBG_BLOCKIRQ) +#define PFERR_PRESENT_MASK BIT(0) +#define PFERR_WRITE_MASK BIT(1) +#define PFERR_USER_MASK BIT(2) +#define PFERR_RSVD_MASK BIT(3) +#define PFERR_FETCH_MASK BIT(4) +#define PFERR_PK_MASK BIT(5) +#define PFERR_SGX_MASK BIT(15) +#define PFERR_GUEST_RMP_MASK BIT_ULL(31) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(32) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_GUEST_ENC_MASK BIT_ULL(34) +#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) +#define PFERR_GUEST_VMPL_MASK BIT_ULL(36) -#define PFERR_PRESENT_BIT 0 -#define PFERR_WRITE_BIT 1 -#define PFERR_USER_BIT 2 -#define PFERR_RSVD_BIT 3 -#define PFERR_FETCH_BIT 4 -#define PFERR_PK_BIT 5 -#define PFERR_SGX_BIT 15 -#define PFERR_GUEST_FINAL_BIT 32 -#define PFERR_GUEST_PAGE_BIT 33 -#define PFERR_IMPLICIT_ACCESS_BIT 48 - -#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) -#define PFERR_USER_MASK BIT(PFERR_USER_BIT) -#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) -#define PFERR_PK_MASK BIT(PFERR_PK_BIT) -#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +/* + * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks + * when emulating instructions that triggers implicit access. + */ +#define PFERR_IMPLICIT_ACCESS BIT_ULL(48) +/* + * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred + * when the guest was accessing private memory. + */ +#define PFERR_PRIVATE_ACCESS BIT_ULL(49) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ @@ -994,9 +997,6 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; - /* set at EPT violation at this point */ - unsigned long exit_qualification; - /* pv related host specific info */ struct { bool pv_unhalted; @@ -1280,12 +1280,14 @@ enum kvm_apicv_inhibit { }; struct kvm_arch { - unsigned long vm_type; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -1312,6 +1314,8 @@ struct kvm_arch { */ spinlock_t mmu_unsync_pages_lock; + u64 shadow_mmio_value; + struct iommu_domain *iommu_domain; bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA @@ -1710,7 +1714,6 @@ struct kvm_x86_ops { void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(int isr); - bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); @@ -1779,6 +1782,7 @@ struct kvm_x86_ops { void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif + int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1814,7 +1818,7 @@ struct kvm_x86_nested_ops { bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, u32 error_code); int (*check_events)(struct kvm_vcpu *vcpu); - bool (*has_events)(struct kvm_vcpu *vcpu); + bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection); void (*triple_fault)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, @@ -1844,6 +1848,7 @@ struct kvm_arch_async_pf { gfn_t gfn; unsigned long cr3; bool direct_map; + u64 error_code; }; extern u32 __read_mostly kvm_nr_uret_msrs; @@ -2140,10 +2145,15 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); +void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots); @@ -2153,8 +2163,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); + #ifdef CONFIG_KVM_PRIVATE_MEM -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM) +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false #endif diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index de31183058..dfd2e9699b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -13,6 +13,7 @@ #define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ #define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ #define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ +#define MCG_SEAM_NR BIT_ULL(12) /* MCG_STATUS_SEAM_NR supported */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) @@ -25,6 +26,7 @@ #define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ #define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ +#define MCG_STATUS_SEAM_NR BIT_ULL(12) /* Machine check inside SEAM non-root mode */ /* MCG_EXT_CTL register defines */ #define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c72c7ff78f..d593e52e66 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -16,10 +16,10 @@ extern int pic_mode; * Summit or generic (i.e. installer) kernels need lots of bus entries. * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#if CONFIG_BASE_SMALL == 0 -# define MAX_MP_BUSSES 260 -#else +#ifdef CONFIG_BASE_SMALL # define MAX_MP_BUSSES 32 +#else +# define MAX_MP_BUSSES 260 #endif #define MAX_IRQ_SOURCES 256 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e72c2b8729..e022e6eb76 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -170,6 +170,10 @@ * CPU is not affected by Branch * History Injection. */ +#define ARCH_CAP_XAPIC_DISABLE BIT(21) /* + * IA32_XAPIC_DISABLE_STATUS MSR + * supported + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. @@ -192,11 +196,6 @@ * File. */ -#define ARCH_CAP_XAPIC_DISABLE BIT(21) /* - * IA32_XAPIC_DISABLE_STATUS MSR - * supported - */ - #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* * Writeback and invalidate the diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 9da9c8a2f1..52f1b4ff0c 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -31,10 +31,12 @@ #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC -#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ - CONFIG_PHYSICAL_ALIGN) +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR) #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ce5111cec3..3bedee1801 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -59,12 +59,6 @@ #define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset this_cpu_read(this_cpu_off) -#ifdef CONFIG_X86_64 -#define __raw_my_cpu_offset raw_cpu_read_8(this_cpu_off); -#else -#define __raw_my_cpu_offset raw_cpu_read_4(this_cpu_off); -#endif - /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. @@ -76,7 +70,7 @@ #ifndef BUILD_VDSO32_64 #define arch_raw_cpu_ptr(_ptr) \ ({ \ - unsigned long tcp_ptr__ = __raw_my_cpu_offset; \ + unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \ tcp_ptr__ += (__force unsigned long)(_ptr); \ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ }) @@ -96,7 +90,7 @@ #endif /* CONFIG_SMP */ #define __my_cpu_type(var) typeof(var) __percpu_seg_override -#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr)*)(__force uintptr_t)(ptr) +#define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) #define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x @@ -224,25 +218,26 @@ do { \ }) /* - * xchg is implemented using cmpxchg without a lock prefix. xchg is - * expensive due to the implied lock prefix. The processor cannot prefetch - * cachelines if xchg is used. + * raw_cpu_xchg() can use a load-store since + * it is not required to be IRQ-safe. */ -#define percpu_xchg_op(size, qual, _var, _nval) \ +#define raw_percpu_xchg_op(_var, _nval) \ ({ \ - __pcpu_type_##size pxo_old__; \ - __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \ - asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \ - "%[oval]") \ - "\n1:\t" \ - __pcpu_op2_##size("cmpxchg", "%[nval]", \ - __percpu_arg([var])) \ - "\n\tjnz 1b" \ - : [oval] "=&a" (pxo_old__), \ - [var] "+m" (__my_cpu_var(_var)) \ - : [nval] __pcpu_reg_##size(, pxo_new__) \ - : "memory"); \ - (typeof(_var))(unsigned long) pxo_old__; \ + typeof(_var) pxo_old__ = raw_cpu_read(_var); \ + raw_cpu_write(_var, _nval); \ + pxo_old__; \ +}) + +/* + * this_cpu_xchg() is implemented using cmpxchg without a lock prefix. + * xchg is expensive due to the implied lock prefix. The processor + * cannot prefetch cachelines if xchg is used. + */ +#define this_percpu_xchg_op(_var, _nval) \ +({ \ + typeof(_var) pxo_old__ = this_cpu_read(_var); \ + do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ + pxo_old__; \ }) /* @@ -422,10 +417,6 @@ do { \ * actually per-thread variables implemented as per-CPU variables and * thus stable for the duration of the respective task. */ -#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) -#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) -#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp) -#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) #define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) #ifdef CONFIG_USE_X86_SEG_SUPPORT @@ -494,6 +485,10 @@ do { \ #define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif /* CONFIG_USE_X86_SEG_SUPPORT */ +#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) +#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) +#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp) + #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) #define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) #define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) @@ -503,18 +498,6 @@ do { \ #define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val) #define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val) #define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val) - -/* - * raw_cpu_xchg() can use a load-store since it is not required to be - * IRQ-safe. - */ -#define raw_percpu_xchg_op(var, nval) \ -({ \ - typeof(var) pxo_ret__ = raw_cpu_read(var); \ - raw_cpu_write(var, (nval)); \ - pxo_ret__; \ -}) - #define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) @@ -528,9 +511,9 @@ do { \ #define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val) #define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val) #define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval) +#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval) #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) @@ -557,6 +540,8 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 +#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) + #define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val) #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) @@ -569,41 +554,41 @@ do { \ #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) +#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) -#endif - -static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, - const unsigned long __percpu *addr) -{ - unsigned long __percpu *a = - (unsigned long __percpu *)addr + nr / BITS_PER_LONG; -#ifdef CONFIG_X86_64 - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; +#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp) #else - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; -#endif -} +/* There is no generic 64 bit read stable operation for 32 bit targets. */ +#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) -static inline bool x86_this_cpu_variable_test_bit(int nr, - const unsigned long __percpu *addr) -{ - bool oldbit; +#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp) +#endif - asm volatile("btl "__percpu_arg(2)",%1" - CC_SET(c) - : CC_OUT(c) (oldbit) - : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr)); +#define x86_this_cpu_constant_test_bit(_nr, _var) \ +({ \ + unsigned long __percpu *addr__ = \ + (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \ + !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \ +}) - return oldbit; -} +#define x86_this_cpu_variable_test_bit(_nr, _var) \ +({ \ + bool oldbit; \ + \ + asm volatile("btl %[nr], " __percpu_arg([var]) \ + CC_SET(c) \ + : CC_OUT(c) (oldbit) \ + : [var] "m" (__my_cpu_var(_var)), \ + [nr] "rI" (_nr)); \ + oldbit; \ +}) -#define x86_this_cpu_test_bit(nr, addr) \ - (__builtin_constant_p((nr)) \ - ? x86_this_cpu_constant_test_bit((nr), (addr)) \ - : x86_this_cpu_variable_test_bit((nr), (addr))) +#define x86_this_cpu_test_bit(_nr, _var) \ + (__builtin_constant_p(_nr) \ + ? x86_this_cpu_constant_test_bit(_nr, _var) \ + : x86_this_cpu_variable_test_bit(_nr, _var)) #include diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 315535ffb2..65b8e5bb90 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -234,6 +234,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } +#define pud_pfn pud_pfn static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); @@ -387,23 +388,7 @@ static inline pte_t pte_wrprotect(pte_t pte) #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { - bool wp = pte_flags(pte) & _PAGE_UFFD_WP; - -#ifdef CONFIG_DEBUG_VM - /* - * Having write bit for wr-protect-marked present ptes is fatal, - * because it means the uffd-wp bit will be ignored and write will - * just go through. - * - * Use any chance of pgtable walking to verify this (e.g., when - * page swapped out or being migrated for all purposes). It means - * something is already wrong. Tell the admin even before the - * process crashes. We also nail it with wrong pgtable setup. - */ - WARN_ON_ONCE(wp && pte_write(pte)); -#endif - - return wp; + return pte_flags(pte) & _PAGE_UFFD_WP; } static inline pte_t pte_mkuffd_wp(pte_t pte) @@ -1200,7 +1185,6 @@ static inline int pgd_none(pgd_t pgd) extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); -extern void memblock_find_dma_reserve(void); void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e9db77231..3c4407271d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -245,6 +245,7 @@ extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN +#define HAVE_ARCH_UNMAPPED_AREA_VMFLAGS #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h new file mode 100644 index 0000000000..de788b400f --- /dev/null +++ b/arch/x86/include/asm/posted_intr.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_POSTED_INTR_H +#define _X86_POSTED_INTR_H +#include + +#define POSTED_INTR_ON 0 +#define POSTED_INTR_SN 1 + +#define PID_TABLE_ENTRY_VALID 1 + +/* Posted-Interrupt Descriptor */ +struct pi_desc { + union { + u32 pir[8]; /* Posted interrupt requested */ + u64 pir64[4]; + }; + union { + struct { + u16 notifications; /* Suppress and outstanding bits */ + u8 nv; + u8 rsvd_2; + u32 ndst; + }; + u64 control; + }; + u32 rsvd[6]; +} __aligned(64); + +static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + +static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) +{ + return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); +} + +static inline void pi_set_sn(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline void pi_set_on(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_on(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_on(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_sn(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +/* Non-atomic helpers */ +static inline void __pi_set_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications |= BIT(POSTED_INTR_SN); +} + +static inline void __pi_clear_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications &= ~BIT(POSTED_INTR_SN); +} + +#ifdef CONFIG_X86_POSTED_MSI +/* + * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's + * own interrupts. Here we do not distinguish them since those vector bits in + * PIR will always be zero. + */ +static inline bool pi_pending_this_cpu(unsigned int vector) +{ + struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); + + if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR)) + return false; + + return test_bit(vector, (unsigned long *)pid->pir); +} + +extern void intel_posted_msi_init(void); +#else +static inline bool pi_pending_this_cpu(unsigned int vector) { return false; } + +static inline void intel_posted_msi_init(void) {}; +#endif /* X86_POSTED_MSI */ + +#endif /* _X86_POSTED_INTR_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 78e51b0d64..cb4f6c513c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -108,9 +108,23 @@ struct cpuinfo_topology { }; struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; + union { + /* + * The particular ordering (low-to-high) of (vendor, + * family, model) is done in case range of models, like + * it is usually done on AMD, need to be compared. + */ + struct { + __u8 x86_model; + /* CPU family */ + __u8 x86; + /* CPU vendor */ + __u8 x86_vendor; + __u8 x86_reserved; + }; + /* combined vendor, family, model */ + __u32 x86_vfm; + }; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ @@ -586,7 +600,7 @@ extern char ignore_fpu_irq; # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else -# define BASE_PREFETCH "prefetcht0 %P1" +# define BASE_PREFETCH "prefetcht0 %1" #endif /* @@ -597,7 +611,7 @@ extern char ignore_fpu_irq; */ static inline void prefetch(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchnta %P1", + alternative_input(BASE_PREFETCH, "prefetchnta %1", X86_FEATURE_XMM, "m" (*(const char *)x)); } @@ -609,7 +623,7 @@ static inline void prefetch(const void *x) */ static __always_inline void prefetchw(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchw %P1", + alternative_input(BASE_PREFETCH, "prefetchw %1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } @@ -635,12 +649,10 @@ static __always_inline void prefetchw(const void *x) #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -extern unsigned long __end_init_task[]; +extern unsigned long __top_init_kernel_stack[]; #define INIT_THREAD { \ - .sp = (unsigned long)&__end_init_task - \ - TOP_OF_KERNEL_STACK_PADDING - \ - sizeof(struct pt_regs), \ + .sp = (unsigned long)&__top_init_kernel_stack, \ } extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 043758a2e6..365798cb44 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -23,19 +23,14 @@ extern int of_ioapic; extern u64 initial_dtb; extern void add_dtb(u64 data); void x86_of_pci_init(void); -void x86_dtb_parse_smp_config(void); +void x86_flattree_get_config(void); #else static inline void add_dtb(u64 data) { } static inline void x86_of_pci_init(void) { } -static inline void x86_dtb_parse_smp_config(void) { } +static inline void x86_flattree_get_config(void) { } #define of_ioapic 0 #endif -#ifdef CONFIG_OF_EARLY_FLATTREE -void x86_flattree_get_config(void); -#else -static inline void x86_flattree_get_config(void) { } -#endif extern char cmd_line[COMMAND_LINE_SIZE]; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index cde8357bb2..a053c12939 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { + int val; + if (!static_branch_likely(&virt_spin_lock_key)) return false; @@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock) * horrible lock 'holder' preemption issues. */ - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + __retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } return true; } diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index ef9697f201..0a985784be 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock) * { - * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = _Q_LOCKED_VAL; * - * if (likely(lockval == _Q_LOCKED_VAL)) + * if (try_cmpxchg(&lock->locked, &lockval, 0)) * return; * pv_queued_spin_unlock_slowpath(lock, lockval); * } @@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); #define PV_UNLOCK_ASM \ FRAME_BEGIN \ "push %rdx\n\t" \ - "mov $0x1,%eax\n\t" \ + "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \ "xor %edx,%edx\n\t" \ LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ - "cmp $0x1,%al\n\t" \ "jne .slowpath\n\t" \ "pop %rdx\n\t" \ FRAME_END \ diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h index fef16e3981..42bcd42d70 100644 --- a/arch/x86/include/asm/seccomp.h +++ b/arch/x86/include/asm/seccomp.h @@ -9,7 +9,7 @@ #endif #ifdef CONFIG_COMPAT -#include +#include #define __NR_seccomp_read_32 __NR_ia32_read #define __NR_seccomp_write_32 __NR_ia32_write #define __NR_seccomp_exit_32 __NR_ia32_exit diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b463fcbd4b..5a8246dd53 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -54,8 +54,10 @@ (((unsigned long)fn) << 32)) /* AP Reset Hold */ -#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 -#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0) /* GHCB GPA Register */ #define GHCB_MSR_REG_GPA_REQ 0x012 @@ -99,6 +101,8 @@ enum psc_op { /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_POS 12 +#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0) #define GHCB_MSR_HV_FT_RESP_VAL(v) \ /* GHCBData[63:12] */ \ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 93ed60080c..ca20cc4e58 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -140,7 +140,7 @@ struct secrets_os_area { #define VMPCK_KEY_LEN 32 /* See the SNP spec version 0.9 for secrets page format */ -struct snp_secrets_page_layout { +struct snp_secrets_page { u32 version; u32 imien : 1, rsvd1 : 31; diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h index 42fee8959d..896909f306 100644 --- a/arch/x86/include/asm/shstk.h +++ b/arch/x86/include/asm/shstk.h @@ -21,6 +21,7 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clon void shstk_free(struct task_struct *p); int setup_signal_shadow_stack(struct ksignal *ksig); int restore_signal_shadow_stack(void); +int shstk_update_last_frame(unsigned long val); #else static inline long shstk_prctl(struct task_struct *task, int option, unsigned long arg2) { return -EINVAL; } @@ -31,6 +32,7 @@ static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p, static inline void shstk_free(struct task_struct *p) {} static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; } static inline int restore_signal_shadow_stack(void) { return 0; } +static inline int shstk_update_last_frame(unsigned long val) { return 0; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2e9fc5c400..aec6e2d3aa 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -182,8 +182,8 @@ static __always_inline void clflush(volatile void *__p) static inline void clflushopt(volatile void *__p) { - alternative_io(".byte 0x3e; clflush %P0", - ".byte 0x66; clflush %P0", + alternative_io(".byte 0x3e; clflush %0", + ".byte 0x66; clflush %0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } @@ -205,9 +205,9 @@ static inline void clwb(volatile void *__p) #ifdef CONFIG_X86_USER_SHADOW_STACK static inline int write_user_shstk_64(u64 __user *addr, u64 val) { - asm goto("1: wrussq %[val], (%[addr])\n" + asm goto("1: wrussq %[val], %[addr]\n" _ASM_EXTABLE(1b, %l[fail]) - :: [addr] "r" (addr), [val] "r" (val) + :: [addr] "m" (*addr), [val] "r" (val) :: fail); return 0; fail: diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 857d364b98..9d0b324eab 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n); #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosw" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosw" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET32 static inline void *memset32(uint32_t *s, uint32_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosl" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosl" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET64 static inline void *memset64(uint64_t *s, uint64_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosq" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosq" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #endif diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 345aafbc19..6259f1937f 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -15,7 +15,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); -extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len); +extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len); /* * Clear and restore the kernel write-protection flag on the local CPU. diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 8e048ca980..0ef36190ab 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -300,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) #define vdso_cycles_ok arch_vdso_cycles_ok /* - * x86 specific delta calculation. + * x86 specific calculation of nanoseconds for the current cycle count * * The regular implementation assumes that clocksource reads are globally * monotonic. The TSC can be slightly off across sockets which can cause @@ -308,8 +308,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * jump. * * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. + * @vd->cycles_last. If not then use @vd->cycles_last, which is the base + * time of the current conversion period. * * This variant also uses a custom mask because while the clocksource mask of * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses @@ -317,25 +317,37 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { - /* - * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. - */ - u64 delta = (cycles - last) & S64_MAX; + u64 delta = cycles - vd->cycle_last; /* - * Due to the above mentioned TSC wobbles, filter out negative motion. - * Per the above masking, the effective sign bit is now bit 62. + * Negative motion and deltas which can cause multiplication + * overflow require special treatment. This check covers both as + * negative motion is guaranteed to be greater than @vd::max_cycles + * due to unsigned comparison. + * + * Due to the MSB/Sign-bit being used as invalid marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, + * but that case is also unlikely and will also take the unlikely path + * here. */ - if (unlikely(delta & (1ULL << 62))) - return 0; + if (unlikely(delta > vd->max_cycles)) { + /* + * Due to the above mentioned TSC wobbles, filter out + * negative motion. Per the above masking, the effective + * sign bit is now bit 62. + */ + if (delta & (1ULL << 62)) + return base >> vd->shift; + + /* Handle multiplication overflow gracefully */ + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); + } - return delta * mult; + return ((delta * vd->mult) + base) >> vd->shift; } -#define vdso_calc_delta vdso_calc_delta +#define vdso_calc_ns vdso_calc_ns #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h new file mode 100644 index 0000000000..0950c9535f --- /dev/null +++ b/arch/x86/include/asm/video.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VIDEO_H +#define _ASM_X86_VIDEO_H + +#include + +#include + +struct device; + +pgprot_t pgprot_framebuffer(pgprot_t prot, + unsigned long vm_start, unsigned long vm_end, + unsigned long offset); +#define pgprot_framebuffer pgprot_framebuffer + +bool video_is_primary_device(struct device *dev); +#define video_is_primary_device video_is_primary_device + +#include + +#endif /* _ASM_X86_VIDEO_H */ diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 9e8ac5073e..62ee199099 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -84,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } -#define free_vm86(t) do { } while(0) +#define free_vm86(task) do { (void)(task); } while(0) #endif /* CONFIG_VM86 */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4dba173630..d77a31039f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) #define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) #define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE) #define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) #define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) @@ -226,6 +227,8 @@ enum vmcs_field { VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP_HIGH = 0x00002029, + VE_INFORMATION_ADDRESS = 0x0000202A, + VE_INFORMATION_ADDRESS_HIGH = 0x0000202B, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, ENCLS_EXITING_BITMAP = 0x0000202E, @@ -514,6 +517,7 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63) #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) @@ -630,4 +634,13 @@ enum vmx_l1d_flush_state { extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; +struct vmx_ve_information { + u32 exit_reason; + u32 delivery; + u64 exit_qualification; + u64 guest_linear_address; + u64 guest_physical_address; + u16 eptp_index; +}; + #endif diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index 266daf5b5b..695f366648 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -77,7 +77,7 @@ #define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ #define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ #define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ -#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */ #define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ #define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ #define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ -- cgit v1.2.3