summaryrefslogtreecommitdiffstats
path: root/arch/x86/boot/compressed/head_64.S
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /arch/x86/boot/compressed/head_64.S
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/boot/compressed/head_64.S')
-rw-r--r--arch/x86/boot/compressed/head_64.S645
1 files changed, 645 insertions, 0 deletions
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
new file mode 100644
index 0000000000..bf4a10a579
--- /dev/null
+++ b/arch/x86/boot/compressed/head_64.S
@@ -0,0 +1,645 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/boot/head.S
+ *
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in
+ * laptops may need to access the BIOS data stored there. This is also
+ * useful for future device drivers that either access the BIOS via VM86
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+ .code32
+ .text
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/msr.h>
+#include <asm/processor-flags.h>
+#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
+#include <asm/trapnr.h>
+#include "pgtable.h"
+
+/*
+ * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
+ * in assembly errors due to trying to move .org backward due to the excessive
+ * alignment.
+ */
+#undef __ALIGN
+#define __ALIGN .balign 16, 0x90
+
+/*
+ * Locally defined symbols should be marked hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _end
+
+ __HEAD
+
+/*
+ * This macro gives the relative virtual address of X, i.e. the offset of X
+ * from startup_32. This is the same as the link-time virtual address of X,
+ * since startup_32 is at 0, but defining it this way tells the
+ * assembler/linker that we do not want the actual run-time address of X. This
+ * prevents the linker from trying to create unwanted run-time relocation
+ * entries for the reference when the compressed kernel is linked as PIE.
+ *
+ * A reference X(%reg) will result in the link-time VA of X being stored with
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
+ * adds the 64-bit base address where the kernel is loaded.
+ *
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
+ * and no run-time relocation.
+ *
+ * The macro should be used as a displacement with a base register containing
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
+ * [$ rva(X)].
+ *
+ * This macro can only be used from within the .head.text section, since the
+ * expression requires startup_32 to be in the same section as the code being
+ * assembled.
+ */
+#define rva(X) ((X) - startup_32)
+
+ .code32
+SYM_FUNC_START(startup_32)
+ /*
+ * 32bit entry is 0 and it is ABI so immutable!
+ * If we come here directly from a bootloader,
+ * kernel(text+data+bss+brk) ramdisk, zero_page, command line
+ * all need to be under the 4G limit.
+ */
+ cld
+ cli
+
+/*
+ * Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x1e4 (defined as a scratch field) are used as the stack
+ * for this calculation. Only 4 bytes are needed.
+ */
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $ rva(1b), %ebp
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal rva(gdt)(%ebp), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
+ /* Setup a stack and load CS from current GDT */
+ leal rva(boot_stack_end)(%ebp), %esp
+
+ pushl $__KERNEL32_CS
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
+1:
+
+ /* Setup Exception handling for SEV-ES */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call startup32_load_idt
+#endif
+
+ /* Make sure cpu supports long mode. */
+ call verify_cpu
+ testl %eax, %eax
+ jnz .Lno_longmode
+
+/*
+ * Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jae 1f
+#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
+
+ /* Target address to relocate to for decompression */
+ addl BP_init_size(%esi), %ebx
+ subl $ rva(_end), %ebx
+
+/*
+ * Prepare for entering 64 bit mode
+ */
+
+ /* Enable PAE mode */
+ movl %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ movl %eax, %cr4
+
+ /*
+ * Build early 4G boot pagetable
+ */
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+ * This will ensure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+ xorl %edx, %edx
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call get_sev_encryption_bit
+ xorl %edx, %edx
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+ bts %eax, %edx /* Set encryption mask for page tables */
+ /*
+ * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+ * startup32_check_sev_cbit() will do a check. sev_enable() will
+ * initialize sev_status with all the bits reported by
+ * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+ * needs to be set for now.
+ */
+ movl $1, rva(sev_status)(%ebp)
+1:
+#endif
+
+ /* Initialize Page tables to 0 */
+ leal rva(pgtable)(%ebx), %edi
+ xorl %eax, %eax
+ movl $(BOOT_INIT_PGT_SIZE/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal rva(pgtable + 0)(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+
+ /* Build Level 3 */
+ leal rva(pgtable + 0x1000)(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl %edx, 0x04(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal rva(pgtable + 0x2000)(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal rva(pgtable)(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* After gdt is loaded */
+ xorl %eax, %eax
+ lldt %ax
+ movl $__BOOT_TSS, %eax
+ ltr %ax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+#endif
+
+ /*
+ * Setup for the jump to 64bit mode
+ *
+ * When the jump is performed we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ leal rva(startup_64)(%ebp), %eax
+#ifdef CONFIG_EFI_MIXED
+ cmpb $1, rva(efi_is64)(%ebp)
+ je 1f
+ leal rva(startup_64_mixed_mode)(%ebp), %eax
+1:
+#endif
+
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $CR0_STATE, %eax
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+SYM_FUNC_END(startup_32)
+
+ .code64
+ .org 0x200
+SYM_CODE_START(startup_64)
+ /*
+ * 64bit entry is 0x200 and it is ABI so immutable!
+ * We come here either from startup_32 or directly from a
+ * 64bit bootloader.
+ * If we come here from a bootloader, kernel(text+data+bss+brk),
+ * ramdisk, zero_page, command line could be above 4G.
+ * We depend on an identity mapped page table being provided
+ * that maps our entire kernel(text+data+bss+brk), zero page
+ * and command line.
+ */
+
+ cld
+ cli
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /*
+ * Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary. %rbp contains the
+ * decompressed kernel start address.
+ *
+ * If it is a relocatable kernel then decompress and run the kernel
+ * from load address aligned to 2MB addr, otherwise decompress and
+ * run the kernel from LOAD_PHYSICAL_ADDR
+ *
+ * We cannot rely on the calculation done in 32-bit mode, since we
+ * may have been invoked via the 64-bit entry point.
+ */
+
+ /* Start with the delta to where the kernel will run at. */
+#ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+ movl BP_kernel_alignment(%rsi), %eax
+ decl %eax
+ addq %rax, %rbp
+ notq %rax
+ andq %rax, %rbp
+ cmpq $LOAD_PHYSICAL_ADDR, %rbp
+ jae 1f
+#endif
+ movq $LOAD_PHYSICAL_ADDR, %rbp
+1:
+
+ /* Target address to relocate to for decompression */
+ movl BP_init_size(%rsi), %ebx
+ subl $ rva(_end), %ebx
+ addq %rbp, %rbx
+
+ /* Set up the stack */
+ leaq rva(boot_stack_end)(%rbx), %rsp
+
+ /*
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we might want to enable 5-level paging or vice versa.
+ *
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
+ *
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
+ *
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+ */
+
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt64(%rip), %rax
+ addq %rax, 2(%rax)
+ lgdt (%rax)
+
+ /* Reload CS so IRET returns to a CS actually in the GDT */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+ /*
+ * RSI holds a pointer to a boot_params structure provided by the
+ * loader, and this needs to be preserved across C function calls. So
+ * move it into a callee saved register.
+ */
+ movq %rsi, %r15
+
+ call load_stage1_idt
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+ * CPUID instructions can be properly handled for SEV-ES guests.
+ *
+ * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+ * CPUID instructions being issued, so go ahead and do that now via
+ * sev_enable(), which will also handle the rest of the SEV-related
+ * detection/setup to ensure that has been done in advance of any dependent
+ * code. Pass the boot_params pointer as the first argument.
+ */
+ movq %r15, %rdi
+ call sev_enable
+#endif
+
+ /*
+ * configure_5level_paging() updates the number of paging levels using
+ * a trampoline in 32-bit addressable memory if the current number does
+ * not match the desired number.
+ *
+ * Pass the boot_params pointer as the first argument. The second
+ * argument is the relocated address of the page table to use instead
+ * of the page table in trampoline memory (if required).
+ */
+ movq %r15, %rdi
+ leaq rva(top_pgtable)(%rbx), %rsi
+ call configure_5level_paging
+
+ /* Zero EFLAGS */
+ pushq $0
+ popfq
+
+/*
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ leaq (_bss-8)(%rip), %rsi
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+ shrl $3, %ecx
+ std
+ rep movsq
+ cld
+
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leaq rva(gdt64)(%rbx), %rax
+ leaq rva(gdt)(%rbx), %rdx
+ movq %rdx, 2(%rax)
+ lgdt (%rax)
+
+/*
+ * Jump to the relocated address.
+ */
+ leaq rva(.Lrelocated)(%rbx), %rax
+ jmp *%rax
+SYM_CODE_END(startup_64)
+
+ .text
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+
+/*
+ * Clear BSS (stack is currently empty)
+ */
+ xorl %eax, %eax
+ leaq _bss(%rip), %rdi
+ leaq _ebss(%rip), %rcx
+ subq %rdi, %rcx
+ shrq $3, %rcx
+ rep stosq
+
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+ movq %r15, %rdi
+ call initialize_identity_maps
+
+/*
+ * Do the extraction, and jump to the new kernel..
+ */
+ /* pass struct boot_params pointer and output target address */
+ movq %r15, %rdi
+ movq %rbp, %rsi
+ call extract_kernel /* returns kernel entry point in %rax */
+
+/*
+ * Jump to the decompressed kernel.
+ */
+ movq %r15, %rsi
+ jmp *%rax
+SYM_FUNC_END(.Lrelocated)
+
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
+ *
+ * Return address is at the top of the stack (might be above 4G).
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
+ */
+ .section ".rodata", "a", @progbits
+SYM_CODE_START(trampoline_32bit_src)
+ /*
+ * Preserve callee save 64-bit registers on the stack: this is
+ * necessary because the architecture does not guarantee that GPRs will
+ * retain their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %r15
+ pushq %r14
+ pushq %r13
+ pushq %r12
+ pushq %rbp
+ pushq %rbx
+
+ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+ movq %rsp, %rbx
+ shrq $32, %rbx
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq 0f(%rip), %rax
+ pushq %rax
+ lretq
+
+ /*
+ * The 32-bit code below will do a far jump back to long mode and end
+ * up here after reconfiguring the number of paging levels. First, the
+ * stack pointer needs to be restored to its full 64-bit value before
+ * the callee save register contents can be popped from the stack.
+ */
+.Lret:
+ shlq $32, %rbx
+ orq %rbx, %rsp
+
+ /* Restore the preserved 64-bit registers */
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ retq
+
+ .code32
+0:
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Point CR3 to the trampoline's new top level page table */
+ movl %edi, %cr3
+
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+ jc 1f
+ wrmsr
+1:
+ /* Toggle CR4.LA57 */
+ movl %cr4, %eax
+ btcl $X86_CR4_LA57_BIT, %eax
+ movl %eax, %cr4
+
+ /* Enable paging again. */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /*
+ * Return to the 64-bit calling code using LJMP rather than LRET, to
+ * avoid the need for a 32-bit addressable stack. The destination
+ * address will be adjusted after the template code is copied into a
+ * 32-bit addressable buffer.
+ */
+.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
+SYM_CODE_END(trampoline_32bit_src)
+
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .text
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
+1:
+ hlt
+ jmp 1b
+SYM_FUNC_END(.Lno_longmode)
+
+ .globl verify_cpu
+#include "../../kernel/verify_cpu.S"
+
+ .data
+SYM_DATA_START_LOCAL(gdt64)
+ .word gdt_end - gdt - 1
+ .quad gdt - gdt64
+SYM_DATA_END(gdt64)
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
+ .word 0
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x0080890000000000 /* TS descriptor */
+ .quad 0x0000000000000000 /* TS continued */
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt - 1
+ .quad 0
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+SYM_DATA_START_LOCAL(boot_stack)
+ .fill BOOT_STACK_SIZE, 1, 0
+ .balign 16
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+ .section ".pgtable","aw",@nobits
+ .balign 4096
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)