From a80d58b9b74ee560796b7d05522b85f8aac0f83d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 7 May 2024 04:25:28 +0200 Subject: Merging upstream version 6.1.90. Signed-off-by: Daniel Baumann --- arch/x86/Kconfig | 32 ++-- arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 1 + arch/x86/boot/compressed/sev.c | 3 + arch/x86/boot/compressed/vmlinux.lds.S | 6 +- arch/x86/boot/header.S | 211 ++++++++++--------------- arch/x86/boot/setup.ld | 14 +- arch/x86/boot/tools/build.c | 273 ++------------------------------- arch/x86/events/amd/lbr.c | 6 +- arch/x86/events/core.c | 1 + arch/x86/include/asm/apic.h | 3 +- arch/x86/include/asm/boot.h | 1 + arch/x86/include/asm/coco.h | 5 +- arch/x86/include/asm/init.h | 2 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/mem_encrypt.h | 8 +- arch/x86/include/asm/page_types.h | 12 +- arch/x86/include/asm/pgtable_types.h | 3 +- arch/x86/include/asm/sev.h | 10 +- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/apic/apic.c | 6 +- arch/x86/kernel/cpu/bugs.c | 87 +++++------ arch/x86/kernel/cpu/common.c | 48 +++--- arch/x86/kernel/cpu/cpuid-deps.c | 6 +- arch/x86/kernel/head64.c | 7 +- arch/x86/kernel/platform-quirks.c | 1 + arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/sev-shared.c | 23 ++- arch/x86/kernel/sev.c | 11 +- arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/cpuid.h | 10 ++ arch/x86/kvm/lapic.c | 3 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/vmx/vmx.c | 24 ++- arch/x86/kvm/x86.c | 2 +- arch/x86/mm/mem_encrypt_boot.S | 4 +- arch/x86/mm/mem_encrypt_identity.c | 58 +++---- arch/x86/mm/pat/set_memory.c | 6 +- arch/x86/mm/pti.c | 2 +- 40 files changed, 309 insertions(+), 592 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba815ac47..49cea5b81 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 @@ -2449,17 +2450,17 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" @@ -2563,31 +2564,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also -choice - prompt "Clear branch history" +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" depends on CPU_SUP_INTEL - default SPECTRE_BHI_ON + default y help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer indirect branches. See -config SPECTRE_BHI_ON - bool "on" - help - Equivalent to setting spectre_bhi=on command line parameter. -config SPECTRE_BHI_OFF - bool "off" - help - Equivalent to setting spectre_bhi=off command line parameter. -config SPECTRE_BHI_AUTO - bool "auto" - help - Equivalent to setting spectre_bhi=auto command line parameter. - -endchoice - endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9e38ffaad..3c1b35203 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_e\?data\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3965b2c9e..6e61baff2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 8ae7893d7..45435ff88 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,7 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_text_size = VO___start_rodata - VO__text; const unsigned long kernel_total_size = VO__end - VO__text; static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index d07e665bb..3c5d5c97f 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -118,6 +118,9 @@ static bool fault_in_kernel_space(unsigned long address) #define __init #define __pa(x) ((unsigned long)(x)) +#undef __head +#define __head + #define __BOOT_COMPRESSED /* Basic instruction decoding support needed */ diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 112b2375d..bcf0e4e4c 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -42,11 +42,13 @@ SECTIONS *(.rodata.*) _erodata = . ; } - .data : { + .data : ALIGN(0x1000) { _data = . ; *(.data) *(.data.*) - *(.bss.efistub) + + /* Add 4 bytes of extra space for a CRC-32 checksum */ + . = ALIGN(. + 4, 0x200); _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index d31982509..7593339b5 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -36,65 +36,19 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define ROOT_RDONLY 1 #endif + .set salign, 0x1000 + .set falign, 0x200 + .code16 .section ".bstext", "ax" - - .global bootsect_start -bootsect_start: #ifdef CONFIG_EFI_STUB # "MZ", MS-DOS header .word MZ_MAGIC -#endif - - # Normalize the start address - ljmp $BOOTSEG, $start2 - -start2: - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - xorw %sp, %sp - sti - cld - - movw $bugger_off_msg, %si - -msg_loop: - lodsb - andb %al, %al - jz bs_die - movb $0xe, %ah - movw $7, %bx - int $0x10 - jmp msg_loop - -bs_die: - # Allow the user to press a key, then reboot - xorw %ax, %ax - int $0x16 - int $0x19 - - # int 0x19 should never return. In case it does anyway, - # invoke the BIOS reset code... - ljmp $0xf000,$0xfff0 - -#ifdef CONFIG_EFI_STUB .org 0x3c # # Offset to the PE header. # .long pe_header -#endif /* CONFIG_EFI_STUB */ - - .section ".bsdata", "a" -bugger_off_msg: - .ascii "Use a boot loader.\r\n" - .ascii "\n" - .ascii "Remove disk and press any key to reboot...\r\n" - .byte 0 - -#ifdef CONFIG_EFI_STUB pe_header: .long PE_MAGIC @@ -123,30 +77,26 @@ optional_header: .byte 0x02 # MajorLinkerVersion .byte 0x14 # MinorLinkerVersion - # Filled in by build.c - .long 0 # SizeOfCode + .long ZO__data # SizeOfCode - .long 0 # SizeOfInitializedData + .long ZO__end - ZO__data # SizeOfInitializedData .long 0 # SizeOfUninitializedData - # Filled in by build.c - .long 0x0000 # AddressOfEntryPoint + .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint - .long 0x0200 # BaseOfCode + .long setup_size # BaseOfCode #ifdef CONFIG_X86_32 .long 0 # data #endif extra_header_fields: - # PE specification requires ImageBase to be 64k aligned - .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff #ifdef CONFIG_X86_32 - .long image_base # ImageBase + .long 0 # ImageBase #else - .quad image_base # ImageBase + .quad 0 # ImageBase #endif - .long 0x20 # SectionAlignment - .long 0x20 # FileAlignment + .long salign # SectionAlignment + .long falign # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion @@ -155,12 +105,9 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - # - # The size of the bzImage is written in tools/build.c - # - .long 0 # SizeOfImage + .long setup_size + ZO__end # SizeOfImage - .long 0x200 # SizeOfHeaders + .long salign # SizeOfHeaders .long 0 # CheckSum .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) #ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES @@ -191,87 +138,77 @@ extra_header_fields: # Section table section_table: - # - # The offset & size fields are filled in by build.c. - # .ascii ".setup" .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data - # on disk - .long 0x0 # startup_{32,64} - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers - .long IMAGE_SCN_CNT_CODE | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + .long pecompat_fstart - salign # VirtualSize + .long salign # VirtualAddress + .long pecompat_fstart - salign # SizeOfRawData + .long salign # PointerToRawData - # - # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. The .reloc - # offset & size fields are filled in by build.c. - # - .ascii ".reloc" - .byte 0 - .byte 0 - .long 0 - .long 0 - .long 0 # SizeOfRawData - .long 0 # PointerToRawData - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers + .long 0, 0, 0 .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #ifdef CONFIG_EFI_MIXED - # - # The offset & size fields are filled in by build.c. - # .asciz ".compat" - .long 0 - .long 0x0 - .long 0 # Size of initialized data - # on disk - .long 0x0 - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers + + .long pecompat_fsize # VirtualSize + .long pecompat_fstart # VirtualAddress + .long pecompat_fsize # SizeOfRawData + .long pecompat_fstart # PointerToRawData + + .long 0, 0, 0 .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics + + /* + * Put the IA-32 machine type and the associated entry point address in + * the .compat section, so loaders can figure out which other execution + * modes this image supports. + */ + .pushsection ".pecompat", "a", @progbits + .balign salign + .globl pecompat_fstart +pecompat_fstart: + .byte 0x1 # Version + .byte 8 # Size + .word IMAGE_FILE_MACHINE_I386 # PE machine type + .long setup_size + ZO_efi32_pe_entry # Entrypoint + .byte 0x0 # Sentinel + .popsection +#else + .set pecompat_fstart, setup_size #endif - - # - # The offset & size fields are filled in by build.c. - # .ascii ".text" .byte 0 .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data + .long ZO__data + .long setup_size + .long ZO__data # Size of initialized data # on disk - .long 0x0 # startup_{32,64} + .long setup_size .long 0 # PointerToRelocations .long 0 # PointerToLineNumbers .word 0 # NumberOfRelocations .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_CODE | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + IMAGE_SCN_MEM_EXECUTE # Characteristics + + .ascii ".data\0\0\0" + .long ZO__end - ZO__data # VirtualSize + .long setup_size + ZO__data # VirtualAddress + .long ZO__edata - ZO__data # SizeOfRawData + .long setup_size + ZO__data # PointerToRawData + + .long 0, 0, 0 + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE # Characteristics .set section_count, (. - section_table) / 40 #endif /* CONFIG_EFI_STUB */ @@ -285,12 +222,12 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ .globl hdr hdr: -setup_sects: .byte 0 /* Filled in by build.c */ + .byte setup_sects - 1 root_flags: .word ROOT_RDONLY -syssize: .long 0 /* Filled in by build.c */ +syssize: .long ZO__edata / 16 ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE -root_dev: .word 0 /* Filled in by build.c */ +root_dev: .word 0 /* Default to major/minor 0/0 */ boot_flag: .word 0xAA55 # offset 512, entry point @@ -578,9 +515,25 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # define INIT_SIZE VO_INIT_SIZE #endif + .macro __handover_offset +#ifndef CONFIG_EFI_HANDOVER_PROTOCOL + .long 0 +#elif !defined(CONFIG_X86_64) + .long ZO_efi32_stub_entry +#else + /* Yes, this is really how we defined it :( */ + .long ZO_efi64_stub_entry - 0x200 +#ifdef CONFIG_EFI_MIXED + .if ZO_efi32_stub_entry != ZO_efi64_stub_entry - 0x200 + .error "32-bit and 64-bit EFI entry points do not match" + .endif +#endif +#endif + .endm + init_size: .long INIT_SIZE # kernel initialization size -handover_offset: .long 0 # Filled in by build.c -kernel_info_offset: .long 0 # Filled in by build.c +handover_offset: __handover_offset +kernel_info_offset: .long ZO_kernel_info # End of setup header ##################################################### diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 49546c247..3a2d1360a 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -10,10 +10,11 @@ ENTRY(_start) SECTIONS { . = 0; - .bstext : { *(.bstext) } - .bsdata : { *(.bsdata) } + .bstext : { + *(.bstext) + . = 495; + } =0xffffffff - . = 495; .header : { *(.header) } .entrytext : { *(.entrytext) } .inittext : { *(.inittext) } @@ -23,6 +24,9 @@ SECTIONS .text : { *(.text .text.*) } .text32 : { *(.text32) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + . = ALIGN(16); .rodata : { *(.rodata*) } @@ -38,8 +42,10 @@ SECTIONS .signature : { setup_sig = .; LONG(0x5a5aaa55) - } + setup_size = ALIGN(ABSOLUTE(.), 4096); + setup_sects = ABSOLUTE(setup_size / 512); + } . = ALIGN(16); .bss : diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index bd247692b..10311d77c 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -40,10 +40,6 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -#define DEFAULT_MAJOR_ROOT 0 -#define DEFAULT_MINOR_ROOT 0 -#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT) - /* Minimal number of setup sectors */ #define SETUP_SECT_MIN 5 #define SETUP_SECT_MAX 64 @@ -51,22 +47,7 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -#define PECOFF_RELOC_RESERVE 0x20 - -#ifdef CONFIG_EFI_MIXED -#define PECOFF_COMPAT_RESERVE 0x20 -#else -#define PECOFF_COMPAT_RESERVE 0x0 -#endif - -static unsigned long efi32_stub_entry; -static unsigned long efi64_stub_entry; -static unsigned long efi_pe_entry; -static unsigned long efi32_pe_entry; -static unsigned long kernel_info; -static unsigned long startup_64; -static unsigned long _ehead; -static unsigned long _end; +static unsigned long _edata; /*----------------------------------------------------------------------*/ @@ -152,180 +133,6 @@ static void usage(void) die("Usage: build setup system zoffset.h image"); } -#ifdef CONFIG_EFI_STUB - -static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) -{ - unsigned int pe_header; - unsigned short num_sections; - u8 *section; - - pe_header = get_unaligned_le32(&buf[0x3c]); - num_sections = get_unaligned_le16(&buf[pe_header + 6]); - -#ifdef CONFIG_X86_32 - section = &buf[pe_header + 0xa8]; -#else - section = &buf[pe_header + 0xb8]; -#endif - - while (num_sections > 0) { - if (strncmp((char*)section, section_name, 8) == 0) { - /* section header size field */ - put_unaligned_le32(size, section + 0x8); - - /* section header vma field */ - put_unaligned_le32(vma, section + 0xc); - - /* section header 'size of initialised data' field */ - put_unaligned_le32(datasz, section + 0x10); - - /* section header 'file offset' field */ - put_unaligned_le32(offset, section + 0x14); - - break; - } - section += 0x28; - num_sections--; - } -} - -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) -{ - update_pecoff_section_header_fields(section_name, offset, size, size, offset); -} - -static void update_pecoff_setup_and_reloc(unsigned int size) -{ - u32 setup_offset = 0x200; - u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE; -#ifdef CONFIG_EFI_MIXED - u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE; -#endif - u32 setup_size = reloc_offset - setup_offset; - - update_pecoff_section_header(".setup", setup_offset, setup_size); - update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); - - /* - * Modify .reloc section contents with a single entry. The - * relocation is applied to offset 10 of the relocation section. - */ - put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); - put_unaligned_le32(10, &buf[reloc_offset + 4]); - -#ifdef CONFIG_EFI_MIXED - update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); - - /* - * Put the IA-32 machine type (0x14c) and the associated entry point - * address in the .compat section, so loaders can figure out which other - * execution modes this image supports. - */ - buf[compat_offset] = 0x1; - buf[compat_offset + 1] = 0x8; - put_unaligned_le16(0x14c, &buf[compat_offset + 2]); - put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]); -#endif -} - -static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, - unsigned int init_sz) -{ - unsigned int pe_header; - unsigned int text_sz = file_sz - text_start; - unsigned int bss_sz = init_sz - file_sz; - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* - * The PE/COFF loader may load the image at an address which is - * misaligned with respect to the kernel_alignment field in the setup - * header. - * - * In order to avoid relocating the kernel to correct the misalignment, - * add slack to allow the buffer to be aligned within the declared size - * of the image. - */ - bss_sz += CONFIG_PHYSICAL_ALIGN; - init_sz += CONFIG_PHYSICAL_ALIGN; - - /* - * Size of code: Subtract the size of the first sector (512 bytes) - * which includes the header. - */ - put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); - - /* Size of image */ - put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); - - /* - * Address of entry point for PE/COFF executable - */ - put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]); - - update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz, - text_sz, text_start); -} - -static int reserve_pecoff_reloc_section(int c) -{ - /* Reserve 0x20 bytes for .reloc section */ - memset(buf+c, 0, PECOFF_RELOC_RESERVE); - return PECOFF_RELOC_RESERVE; -} - -static void efi_stub_defaults(void) -{ - /* Defaults for old kernel */ -#ifdef CONFIG_X86_32 - efi_pe_entry = 0x10; -#else - efi_pe_entry = 0x210; - startup_64 = 0x200; -#endif -} - -static void efi_stub_entry_update(void) -{ - unsigned long addr = efi32_stub_entry; - -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL -#ifdef CONFIG_X86_64 - /* Yes, this is really how we defined it :( */ - addr = efi64_stub_entry - 0x200; -#endif - -#ifdef CONFIG_EFI_MIXED - if (efi32_stub_entry != addr) - die("32-bit and 64-bit EFI entry points do not match\n"); -#endif -#endif - put_unaligned_le32(addr, &buf[0x264]); -} - -#else - -static inline void update_pecoff_setup_and_reloc(unsigned int size) {} -static inline void update_pecoff_text(unsigned int text_start, - unsigned int file_sz, - unsigned int init_sz) {} -static inline void efi_stub_defaults(void) {} -static inline void efi_stub_entry_update(void) {} - -static inline int reserve_pecoff_reloc_section(int c) -{ - return 0; -} -#endif /* CONFIG_EFI_STUB */ - -static int reserve_pecoff_compat_section(int c) -{ - /* Reserve 0x20 bytes for .compat section */ - memset(buf+c, 0, PECOFF_COMPAT_RESERVE); - return PECOFF_COMPAT_RESERVE; -} - /* * Parse zoffset.h and find the entry points. We could just #include zoffset.h * but that would mean tools/build would have to be rebuilt every time. It's @@ -354,14 +161,7 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi32_stub_entry); - PARSE_ZOFS(p, efi64_stub_entry); - PARSE_ZOFS(p, efi_pe_entry); - PARSE_ZOFS(p, efi32_pe_entry); - PARSE_ZOFS(p, kernel_info); - PARSE_ZOFS(p, startup_64); - PARSE_ZOFS(p, _ehead); - PARSE_ZOFS(p, _end); + PARSE_ZOFS(p, _edata); p = strchr(p, '\n'); while (p && (*p == '\r' || *p == '\n')) @@ -371,17 +171,14 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors, init_sz; + unsigned int i, sz, setup_sectors; int c; - u32 sys_size; struct stat sb; FILE *file, *dest; int fd; void *kernel; u32 crc = 0xffffffffUL; - efi_stub_defaults(); - if (argc != 5) usage(); parse_zoffset(argv[3]); @@ -403,72 +200,27 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); - c += reserve_pecoff_compat_section(c); - c += reserve_pecoff_reloc_section(c); - /* Pad unused space with zeros */ - setup_sectors = (c + 511) / 512; + setup_sectors = (c + 4095) / 4096; + setup_sectors *= 8; if (setup_sectors < SETUP_SECT_MIN) setup_sectors = SETUP_SECT_MIN; i = setup_sectors*512; memset(buf+c, 0, i-c); - update_pecoff_setup_and_reloc(i); - - /* Set the default root device */ - put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); - /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); if (fd < 0) die("Unable to open `%s': %m", argv[2]); if (fstat(fd, &sb)) die("Unable to stat `%s': %m", argv[2]); - sz = sb.st_size; + if (_edata != sb.st_size) + die("Unexpected file size `%s': %u != %u", argv[2], _edata, + sb.st_size); + sz = _edata - 4; kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ - sys_size = (sz + 15 + 4) / 16; -#ifdef CONFIG_EFI_STUB - /* - * COFF requires minimum 32-byte alignment of sections, and - * adding a signature is problematic without that alignment. - */ - sys_size = (sys_size + 1) & ~1; -#endif - - /* Patch the setup code with the appropriate size parameters */ - buf[0x1f1] = setup_sectors-1; - put_unaligned_le32(sys_size, &buf[0x1f4]); - - init_sz = get_unaligned_le32(&buf[0x260]); -#ifdef CONFIG_EFI_STUB - /* - * The decompression buffer will start at ImageBase. When relocating - * the compressed kernel to its end, we must ensure that the head - * section does not get overwritten. The head section occupies - * [i, i + _ehead), and the destination is [init_sz - _end, init_sz). - * - * At present these should never overlap, because 'i' is at most 32k - * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the - * calculation of INIT_SIZE in boot/header.S ensures that - * 'init_sz - _end' is at least 64k. - * - * For future-proofing, increase init_sz if necessary. - */ - - if (init_sz - _end < i + _ehead) { - init_sz = (i + _ehead + _end + 4095) & ~4095; - put_unaligned_le32(init_sz, &buf[0x260]); - } -#endif - update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz); - - efi_stub_entry_update(); - - /* Update kernel_info offset. */ - put_unaligned_le32(kernel_info, &buf[0x268]); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) @@ -479,13 +231,6 @@ int main(int argc, char ** argv) if (fwrite(kernel, 1, sz, dest) != sz) die("Writing kernel failed"); - /* Add padding leaving 4 bytes for the checksum */ - while (sz++ < (sys_size*16) - 4) { - crc = partial_crc32_one('\0', crc); - if (fwrite("\0", 1, 1, dest) != 1) - die("Writing padding failed"); - } - /* Write the CRC */ put_unaligned_le32(crc, buf); if (fwrite(buf, 1, 4, dest) != 4) diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index b8fe74e8e..48f4095f5 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -173,9 +173,11 @@ void amd_pmu_lbr_read(void) /* * Check if a branch has been logged; if valid = 0, spec = 0 - * then no branch was recorded + * then no branch was recorded; if reserved = 1 then an + * erroneous branch was recorded (see Erratum 1452) */ - if (!entry.to.split.valid && !entry.to.split.spec) + if ((!entry.to.split.valid && !entry.to.split.spec) || + entry.to.split.reserved) continue; perf_clear_branch_entry_bitfields(br + out); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 30fb4931d..1394312b7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da707..36ceecd40 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -109,7 +110,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } extern void native_apic_wait_icr_idle(void); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index a38cc0afc..a3e0be047 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -81,6 +81,7 @@ #ifndef __ASSEMBLY__ extern unsigned int output_len; +extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 1f97d00ad..100a752c3 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -13,9 +13,10 @@ enum cc_vendor { }; extern enum cc_vendor cc_vendor; -extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern u64 cc_mask; + static inline void cc_set_mask(u64 mask) { RIP_REL_REF(cc_mask) = mask; @@ -25,6 +26,8 @@ u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); void cc_random_init(void); #else +static const u64 cc_mask = 0; + static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5f1d3c421..cc9ccf61b 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +#define __head __section(".head.text") + struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dfcdcafe3..887a17148 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -773,6 +773,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; u32 kvm_cpuid_base; + bool is_amd_compatible; u64 reserved_gpa_bits; int maxphyaddr; diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 41d06822b..853f423b1 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); void __init sev_setup_arch(void); -void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); @@ -80,8 +80,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } static inline void __init sev_setup_arch(void) { } -static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { } static inline void sev_es_init_vc_handling(void) { } diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a506a4114..86bd4311d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -11,20 +11,14 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f6116b66f..f0b9b37c4 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -127,7 +127,7 @@ */ #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_CC | \ _PAGE_UFFD_WP) #define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) @@ -153,6 +153,7 @@ enum page_cache_mode { }; #endif +#define _PAGE_CC (_AT(pteval_t, cc_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index c57dd2115..bcac2e53d 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -192,15 +192,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl; void setup_ghcb(void); -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned long npages); -void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned long npages); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __init __noreturn snp_abort(void); +void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); u64 snp_get_unsupported_features(u64 status); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 19a0207e5..56a917df4 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_SIZE) - a; if (iommu_size < 64*1024*1024) { pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 770557110..e1672cc77 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1760,11 +1760,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 96bd3ee83..3f38592ec 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; @@ -143,6 +145,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -300,8 +304,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -340,9 +342,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -400,8 +401,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -412,8 +411,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -436,7 +433,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -446,10 +443,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -507,7 +504,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -658,8 +655,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -668,8 +663,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -812,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1521,20 +1515,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1603,13 +1602,10 @@ static bool __init spec_ctrl_bhi_dis(void) enum bhi_mitigations { BHI_MITIGATION_OFF, BHI_MITIGATION_ON, - BHI_MITIGATION_AUTO, }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : - IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : - BHI_MITIGATION_AUTO; + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { @@ -1620,8 +1616,6 @@ static int __init spectre_bhi_parse_cmdline(char *str) bhi_mitigation = BHI_MITIGATION_OFF; else if (!strcmp(str, "on")) bhi_mitigation = BHI_MITIGATION_ON; - else if (!strcmp(str, "auto")) - bhi_mitigation = BHI_MITIGATION_AUTO; else pr_err("Ignoring unknown spectre_bhi option (%s)", str); @@ -1635,9 +1629,12 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) - return; + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } if (spec_ctrl_bhi_dis()) return; @@ -1649,9 +1646,6 @@ static void __init bhi_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); - if (bhi_mitigation == BHI_MITIGATION_AUTO) - return; - /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); @@ -1884,8 +1878,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1900,7 +1892,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2788,21 +2780,22 @@ static char *pbrsb_eibrs_state(void) } } -static const char * const spectre_bhi_state(void) +static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) - return "; BHI: Syscall hardening, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; - return "; BHI: Vulnerable (Syscall hardening enabled)"; + return "; BHI: Vulnerable"; } static ssize_t spectre_v2_show_state(char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 08fe77d2a..f2bc651c0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1308,25 +1308,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1334,7 +1334,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1343,11 +1343,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1359,7 +1359,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1367,15 +1367,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature * flag and protect from vendor-specific bugs via the whitelist. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1394,9 +1394,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1422,7 +1422,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1430,7 +1430,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1443,7 +1443,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); @@ -1452,11 +1452,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(ia32_cap & ARCH_CAP_BHI_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && !cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) @@ -1466,7 +1466,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index c881bcafb..9c19f40b1 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 84adf12a7..4fae511b2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Manage page tables very early on. @@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = { .address = 0, }; -#define __head __section(".head.text") - static void __head *fixup_pointer(void *ptr, unsigned long physaddr) { return ptr - (void *)_text + (void *)physaddr; @@ -203,7 +202,7 @@ unsigned long __head __startup_64(unsigned long physaddr, load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); /* Is the address not 2M aligned? */ - if (load_delta & ~PMD_PAGE_MASK) + if (load_delta & ~PMD_MASK) for (;;); /* Include the SME encryption mask in the fixup value */ @@ -588,7 +587,7 @@ static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) } /* This runs while still in the direct mapping */ -static void startup_64_load_idt(unsigned long physbase) +static void __head startup_64_load_idt(unsigned long physbase) { struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase); gate_desc *idt = fixup_pointer(bringup_idt_table, physbase); diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b348a672f..b525fe6d6 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7f94dbbc3..a0d3059be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -137,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 271e70d57..3fe76bf17 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -86,7 +86,8 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -323,13 +324,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); } /* @@ -388,7 +383,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -525,7 +520,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -567,7 +563,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1013,7 +1009,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1040,7 +1037,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index e35fcc8d4..f8a8249ae 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -690,7 +691,7 @@ static void pvalidate_pages(unsigned long vaddr, unsigned long npages, bool vali } } -static void __init early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) +static void __head early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -728,7 +729,7 @@ e_term: sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2085,7 +2086,7 @@ fail: * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2111,7 +2112,7 @@ found_cc_info: return cc_info; } -bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2133,7 +2134,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62a44455c..f02961cbb 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -340,6 +340,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b1658c0de..18fd2e845 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -125,6 +125,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bfeafe485..c90fef025 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2548,7 +2548,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d30325e29..13134954e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4649,7 +4649,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use_gbpages(vcpu), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c1590855..10aff2c9a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7742,8 +7742,28 @@ static u64 vmx_get_perf_capabilities(void) if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f72476503..a2ea636a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3278,7 +3278,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 9de3d900b..e25288ee3 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute) * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) - * - intermediate copy buffer (PMD_PAGE_SIZE) + * - intermediate copy buffer (PMD_SIZE) * R8 - physical address of the pagetables to use for encryption */ @@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy) wbinvd /* Invalidate any cache entries */ /* Copy/encrypt up to 2MB at a time */ - movq $PMD_PAGE_SIZE, %r12 + movq $PMD_SIZE, %r12 1: cmpq %r12, %r9 jnb 2f diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 06ccbd36e..f17609884 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -93,12 +94,12 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); +static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -113,7 +114,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); } -static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -150,7 +151,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; } -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -166,7 +167,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); } -static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -192,17 +193,17 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } -static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; + ppd->vaddr += PMD_SIZE; + ppd->paddr += PMD_SIZE; } } -static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -212,7 +213,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -224,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, vaddr_end = ppd->vaddr_end; /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); __sme_map_range_pte(ppd); /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + ppd->vaddr_end = vaddr_end & PMD_MASK; __sme_map_range_pmd(ppd); /* If end is not 2MB aligned, create PTE entries */ @@ -236,22 +237,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); } -static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } -static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } -static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } -static unsigned long __init sme_pgtable_calc(unsigned long len) +static unsigned long __head sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0; @@ -288,7 +289,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return entries + tables; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __head sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -323,9 +324,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * memory from being cached. */ - /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_start = (unsigned long)RIP_REL_REF(_text); + kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -342,25 +342,17 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif - /* - * We're running identity mapped, so we must obtain the address to the - * SME encryption workarea using rip-relative addressing. - */ - asm ("lea sme_workarea(%%rip), %0" - : "=r" (workarea_start) - : "p" (sme_workarea)); - /* * Calculate required number of workarea bytes needed: * executable encryption area size: * stack page (PAGE_SIZE) * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) + * intermediate copy buffer (PMD_SIZE) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ - execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; /* @@ -383,7 +375,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); /* * Set the address to the start of where newly created pagetable @@ -502,7 +494,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); } -void __init sme_enable(struct boot_params *bp) +void __head sme_enable(struct boot_params *bp) { const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 5f0ce77a2..68d4f328f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -747,11 +747,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) switch (level) { case PG_LEVEL_1G: phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; + offset = virt_addr & ~PUD_MASK; break; case PG_LEVEL_2M: phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; + offset = virt_addr & ~PMD_MASK; break; default: phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1041,7 +1041,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, case PG_LEVEL_1G: ref_prot = pud_pgprot(*(pud_t *)kpte); ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pfninc = PMD_SIZE >> PAGE_SHIFT; lpaddr = address & PUD_MASK; lpinc = PMD_SIZE; /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ffe3b3a08..78414c6d1 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void) * of the image. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); /* * This clears _PAGE_GLOBAL from the entire kernel image. -- cgit v1.2.3