summaryrefslogtreecommitdiffstats
path: root/arch/x86/boot
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /arch/x86/boot
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--arch/x86/boot/.gitignore14
-rw-r--r--arch/x86/boot/Makefile159
-rw-r--r--arch/x86/boot/a20.c163
-rw-r--r--arch/x86/boot/apm.c73
-rw-r--r--arch/x86/boot/bioscall.S79
-rw-r--r--arch/x86/boot/bitops.h44
-rw-r--r--arch/x86/boot/boot.h333
-rw-r--r--arch/x86/boot/cmdline.c156
-rw-r--r--arch/x86/boot/compressed/.gitignore7
-rw-r--r--arch/x86/boot/compressed/Makefile161
-rw-r--r--arch/x86/boot/compressed/acpi.c315
-rw-r--r--arch/x86/boot/compressed/cmdline.c30
-rw-r--r--arch/x86/boot/compressed/cpuflags.c9
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c6
-rw-r--r--arch/x86/boot/compressed/efi.c234
-rw-r--r--arch/x86/boot/compressed/efi.h126
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S195
-rw-r--r--arch/x86/boot/compressed/error.c24
-rw-r--r--arch/x86/boot/compressed/error.h10
-rw-r--r--arch/x86/boot/compressed/head_32.S224
-rw-r--r--arch/x86/boot/compressed/head_64.S1022
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c395
-rw-r--r--arch/x86/boot/compressed/idt_64.c91
-rw-r--r--arch/x86/boot/compressed/idt_handlers_64.S77
-rw-r--r--arch/x86/boot/compressed/kaslr.c873
-rw-r--r--arch/x86/boot/compressed/kernel_info.S22
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S198
-rw-r--r--arch/x86/boot/compressed/misc.c472
-rw-r--r--arch/x86/boot/compressed/misc.h241
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c74
-rw-r--r--arch/x86/boot/compressed/pgtable.h20
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c218
-rw-r--r--arch/x86/boot/compressed/sev.c558
-rw-r--r--arch/x86/boot/compressed/string.c81
-rw-r--r--arch/x86/boot/compressed/tdcall.S3
-rw-r--r--arch/x86/boot/compressed/tdx.c77
-rw-r--r--arch/x86/boot/compressed/tdx.h13
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S117
-rw-r--r--arch/x86/boot/copy.S65
-rw-r--r--arch/x86/boot/cpu.c99
-rw-r--r--arch/x86/boot/cpucheck.c227
-rw-r--r--arch/x86/boot/cpuflags.c128
-rw-r--r--arch/x86/boot/cpuflags.h22
-rw-r--r--arch/x86/boot/ctype.h21
-rw-r--r--arch/x86/boot/early_serial_console.c154
-rw-r--r--arch/x86/boot/edd.c180
-rw-r--r--arch/x86/boot/genimage.sh272
-rw-r--r--arch/x86/boot/header.S660
-rwxr-xr-xarch/x86/boot/install.sh37
-rw-r--r--arch/x86/boot/io.h41
-rw-r--r--arch/x86/boot/main.c185
-rw-r--r--arch/x86/boot/memory.c123
-rw-r--r--arch/x86/boot/mkcpustr.c50
-rw-r--r--arch/x86/boot/msr.h26
-rw-r--r--arch/x86/boot/mtools.conf.in21
-rw-r--r--arch/x86/boot/pm.c124
-rw-r--r--arch/x86/boot/pmjump.S75
-rw-r--r--arch/x86/boot/printf.c307
-rw-r--r--arch/x86/boot/regs.c27
-rw-r--r--arch/x86/boot/setup.ld66
-rw-r--r--arch/x86/boot/string.c378
-rw-r--r--arch/x86/boot/string.h34
-rw-r--r--arch/x86/boot/tools/.gitignore2
-rw-r--r--arch/x86/boot/tools/build.c500
-rw-r--r--arch/x86/boot/tty.c137
-rw-r--r--arch/x86/boot/version.c20
-rw-r--r--arch/x86/boot/vesa.h67
-rw-r--r--arch/x86/boot/video-bios.c126
-rw-r--r--arch/x86/boot/video-mode.c171
-rw-r--r--arch/x86/boot/video-vesa.c279
-rw-r--r--arch/x86/boot/video-vga.c286
-rw-r--r--arch/x86/boot/video.c343
-rw-r--r--arch/x86/boot/video.h118
73 files changed, 12285 insertions, 0 deletions
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
new file mode 100644
index 000000000..1189be057
--- /dev/null
+++ b/arch/x86/boot/.gitignore
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+bootsect
+bzImage
+cpustr.h
+mkcpustr
+voffset.h
+zoffset.h
+setup
+setup.bin
+setup.elf
+fdimage
+mtools.conf
+image.iso
+hdimage
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
new file mode 100644
index 000000000..9e38ffaad
--- /dev/null
+++ b/arch/x86/boot/Makefile
@@ -0,0 +1,159 @@
+#
+# arch/x86/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+# Changed by many, many contributors over the years.
+#
+
+# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+
+# Kernel does not boot with kcov instrumentation here.
+# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
+# callback into middle of per-cpu data enabling code. Thus the callback observed
+# inconsistent state and crashed. We are interested mostly in syscall coverage,
+# so boot code is not interesting anyway.
+KCOV_INSTRUMENT := n
+
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+
+SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
+
+targets := vmlinux.bin setup.bin setup.elf bzImage
+targets += fdimage fdimage144 fdimage288 image.iso hdimage
+subdir- := compressed
+
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
+setup-y += early_serial_console.o edd.o header.o main.o memory.o
+setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
+setup-y += video-mode.o version.o
+setup-$(CONFIG_X86_APM_BOOT) += apm.o
+
+# The link order of the video-*.o modules can matter. In particular,
+# video-vga.o *must* be listed first, followed by video-vesa.o.
+# Hardware-specific drivers should follow in the order they should be
+# probed, and video-bios.o should typically be last.
+setup-y += video-vga.o
+setup-y += video-vesa.o
+setup-y += video-bios.o
+
+targets += $(setup-y)
+hostprogs := tools/build
+hostprogs += mkcpustr
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
+ -include include/generated/autoconf.h \
+ -D__EXPORTED_HEADERS__
+
+ifdef CONFIG_X86_FEATURE_NAMES
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR $@
+ cmd_cpustr = $(obj)/mkcpustr > $@
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+ $(call if_changed,cpustr)
+endif
+targets += cpustr.h
+
+# ---------------------------------------------------------------------------
+
+KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+
+$(obj)/bzImage: asflags-y := $(SVGA_MODE)
+
+quiet_cmd_image = BUILD $@
+silent_redirect_image = >/dev/null
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
+ $(obj)/zoffset.h $@ $($(quiet)redirect_image)
+
+$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+ $(call if_changed,image)
+ @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')'
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
+$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,objcopy)
+
+SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
+
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,zoffset)
+
+
+AFLAGS_header.o += -I$(objtree)/$(obj)
+$(obj)/header.o: $(obj)/zoffset.h
+
+LDFLAGS_setup.elf := -m elf_i386 -z noexecstack -T
+$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
+ $(call if_changed,ld)
+
+OBJCOPYFLAGS_setup.bin := -O binary
+$(obj)/setup.bin: $(obj)/setup.elf FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+# Set this if you want to pass append arguments to the
+# bzdisk/fdimage/hdimage/isoimage kernel
+FDARGS =
+# Set this if you want one or more initrds included in the image
+FDINITRD =
+
+imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
+
+$(obj)/mtools.conf: $(src)/mtools.conf.in
+ sed -e 's|@OBJ@|$(obj)|g' < $< > $@
+
+targets += mtools.conf
+
+# genimage.sh requires bash, but it also has a bunch of other
+# external dependencies.
+quiet_cmd_genimage = GENIMAGE $3
+cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
+ $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
+
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
+
+# This requires write access to /dev/fd0
+# All images require syslinux to be installed; hdimage also requires
+# EDK2/OVMF if the kernel is compiled with the EFI stub.
+bzdisk: $(imgdeps)
+ $(call cmd,genimage,bzdisk,/dev/fd0)
+
+fdimage fdimage144: $(imgdeps)
+ $(call cmd,genimage,fdimage144,$(obj)/fdimage)
+ @$(kecho) 'Kernel: $(obj)/fdimage is ready'
+
+fdimage288: $(imgdeps)
+ $(call cmd,genimage,fdimage288,$(obj)/fdimage)
+ @$(kecho) 'Kernel: $(obj)/fdimage is ready'
+
+hdimage: $(imgdeps)
+ $(call cmd,genimage,hdimage,$(obj)/hdimage)
+ @$(kecho) 'Kernel: $(obj)/hdimage is ready'
+
+isoimage: $(imgdeps)
+ $(call cmd,genimage,isoimage,$(obj)/image.iso)
+ @$(kecho) 'Kernel: $(obj)/image.iso is ready'
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
new file mode 100644
index 000000000..a2b6b4289
--- /dev/null
+++ b/arch/x86/boot/a20.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Enable A20 gate (return -1 on failure)
+ */
+
+#include "boot.h"
+
+#define MAX_8042_LOOPS 100000
+#define MAX_8042_FF 32
+
+static int empty_8042(void)
+{
+ u8 status;
+ int loops = MAX_8042_LOOPS;
+ int ffs = MAX_8042_FF;
+
+ while (loops--) {
+ io_delay();
+
+ status = inb(0x64);
+ if (status == 0xff) {
+ /* FF is a plausible, but very unlikely status */
+ if (!--ffs)
+ return -1; /* Assume no KBC present */
+ }
+ if (status & 1) {
+ /* Read and discard input data */
+ io_delay();
+ (void)inb(0x60);
+ } else if (!(status & 2)) {
+ /* Buffers empty, finished! */
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+/* Returns nonzero if the A20 line is enabled. The memory address
+ used as a test is the int $0x80 vector, which should be safe. */
+
+#define A20_TEST_ADDR (4*0x80)
+#define A20_TEST_SHORT 32
+#define A20_TEST_LONG 2097152 /* 2^21 */
+
+static int a20_test(int loops)
+{
+ int ok = 0;
+ int saved, ctr;
+
+ set_fs(0x0000);
+ set_gs(0xffff);
+
+ saved = ctr = rdfs32(A20_TEST_ADDR);
+
+ while (loops--) {
+ wrfs32(++ctr, A20_TEST_ADDR);
+ io_delay(); /* Serialize and make delay constant */
+ ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
+ if (ok)
+ break;
+ }
+
+ wrfs32(saved, A20_TEST_ADDR);
+ return ok;
+}
+
+/* Quick test to see if A20 is already enabled */
+static int a20_test_short(void)
+{
+ return a20_test(A20_TEST_SHORT);
+}
+
+/* Longer test that actually waits for A20 to come on line; this
+ is useful when dealing with the KBC or other slow external circuitry. */
+static int a20_test_long(void)
+{
+ return a20_test(A20_TEST_LONG);
+}
+
+static void enable_a20_bios(void)
+{
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.ax = 0x2401;
+ intcall(0x15, &ireg, NULL);
+}
+
+static void enable_a20_kbc(void)
+{
+ empty_8042();
+
+ outb(0xd1, 0x64); /* Command write */
+ empty_8042();
+
+ outb(0xdf, 0x60); /* A20 on */
+ empty_8042();
+
+ outb(0xff, 0x64); /* Null command, but UHCI wants it */
+ empty_8042();
+}
+
+static void enable_a20_fast(void)
+{
+ u8 port_a;
+
+ port_a = inb(0x92); /* Configuration port A */
+ port_a |= 0x02; /* Enable A20 */
+ port_a &= ~0x01; /* Do not reset machine */
+ outb(port_a, 0x92);
+}
+
+/*
+ * Actual routine to enable A20; return 0 on ok, -1 on failure
+ */
+
+#define A20_ENABLE_LOOPS 255 /* Number of times to try */
+
+int enable_a20(void)
+{
+ int loops = A20_ENABLE_LOOPS;
+ int kbc_err;
+
+ while (loops--) {
+ /* First, check to see if A20 is already enabled
+ (legacy free, etc.) */
+ if (a20_test_short())
+ return 0;
+
+ /* Next, try the BIOS (INT 0x15, AX=0x2401) */
+ enable_a20_bios();
+ if (a20_test_short())
+ return 0;
+
+ /* Try enabling A20 through the keyboard controller */
+ kbc_err = empty_8042();
+
+ if (a20_test_short())
+ return 0; /* BIOS worked, but with delayed reaction */
+
+ if (!kbc_err) {
+ enable_a20_kbc();
+ if (a20_test_long())
+ return 0;
+ }
+
+ /* Finally, try enabling the "fast A20 gate" */
+ enable_a20_fast();
+ if (a20_test_long())
+ return 0;
+ }
+
+ return -1;
+}
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
new file mode 100644
index 000000000..bda15f967
--- /dev/null
+++ b/arch/x86/boot/apm.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * Original APM BIOS checking by Stephen Rothwell, May 1994
+ * (sfr@canb.auug.org.au)
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Get APM BIOS information
+ */
+
+#include "boot.h"
+
+int query_apm_bios(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* APM BIOS installation check */
+ initregs(&ireg);
+ ireg.ah = 0x53;
+ intcall(0x15, &ireg, &oreg);
+
+ if (oreg.flags & X86_EFLAGS_CF)
+ return -1; /* No APM BIOS */
+
+ if (oreg.bx != 0x504d) /* "PM" signature */
+ return -1;
+
+ if (!(oreg.cx & 0x02)) /* 32 bits supported? */
+ return -1;
+
+ /* Disconnect first, just in case */
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
+
+ /* 32-bit connect */
+ ireg.al = 0x03;
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.apm_bios_info.cseg = oreg.ax;
+ boot_params.apm_bios_info.offset = oreg.ebx;
+ boot_params.apm_bios_info.cseg_16 = oreg.cx;
+ boot_params.apm_bios_info.dseg = oreg.dx;
+ boot_params.apm_bios_info.cseg_len = oreg.si;
+ boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+ boot_params.apm_bios_info.dseg_len = oreg.di;
+
+ if (oreg.flags & X86_EFLAGS_CF)
+ return -1;
+
+ /* Redo the installation check as the 32-bit connect;
+ some BIOSes return different flags this way... */
+
+ ireg.al = 0x00;
+ intcall(0x15, &ireg, &oreg);
+
+ if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
+ /* Failure with 32-bit connect, try to disconnect and ignore */
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
+ return -1;
+ }
+
+ boot_params.apm_bios_info.version = oreg.ax;
+ boot_params.apm_bios_info.flags = oreg.cx;
+ return 0;
+}
+
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 000000000..aa9b96457
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
+ * touching registers they shouldn't be.
+ */
+
+ .code16
+ .section ".inittext","ax"
+ .globl intcall
+ .type intcall, @function
+intcall:
+ /* Self-modify the INT instruction. Ugly, but works. */
+ cmpb %al, 3f
+ je 1f
+ movb %al, 3f
+ jmp 1f /* Synchronize pipeline */
+1:
+ /* Save state */
+ pushfl
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Copy input state to stack frame */
+ subw $44, %sp
+ movw %dx, %si
+ movw %sp, %di
+ movw $11, %cx
+ rep; movsl
+
+ /* Pop full state from the stack */
+ popal
+ popw %gs
+ popw %fs
+ popw %es
+ popw %ds
+ popfl
+
+ /* Actual INT */
+ .byte 0xcd /* INT opcode */
+3: .byte 0
+
+ /* Push full state to the stack */
+ pushfl
+ pushw %ds
+ pushw %es
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Re-establish C environment invariants */
+ cld
+ movzwl %sp, %esp
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+
+ /* Copy output state from stack frame */
+ movw 68(%esp), %di /* Original %cx == 3rd argument */
+ andw %di, %di
+ jz 4f
+ movw %sp, %si
+ movw $11, %cx
+ rep; movsl
+4: addw $44, %sp
+
+ /* Restore state and return */
+ popal
+ popw %gs
+ popw %fs
+ popfl
+ retl
+ .size intcall, .-intcall
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
new file mode 100644
index 000000000..8518ae214
--- /dev/null
+++ b/arch/x86/boot/bitops.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very simple bitops for the boot code.
+ */
+
+#ifndef BOOT_BITOPS_H
+#define BOOT_BITOPS_H
+#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
+
+#include <linux/types.h>
+#include <asm/asm.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
+{
+ const u32 *p = addr;
+ return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
+}
+static inline bool variable_test_bit(int nr, const void *addr)
+{
+ bool v;
+ const u32 *p = addr;
+
+ asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr));
+ return v;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+static inline void set_bit(int nr, void *addr)
+{
+ asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
+}
+
+#endif /* BOOT_BITOPS_H */
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
new file mode 100644
index 000000000..148ba5c51
--- /dev/null
+++ b/arch/x86/boot/boot.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Header file for the real-mode kernel code
+ */
+
+#ifndef BOOT_BOOT_H
+#define BOOT_BOOT_H
+
+#define STACK_SIZE 1024 /* Minimum number of bytes for stack */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stdarg.h>
+#include <linux/types.h>
+#include <linux/edd.h>
+#include <asm/setup.h>
+#include <asm/asm.h>
+#include "bitops.h"
+#include "ctype.h"
+#include "cpuflags.h"
+#include "io.h"
+
+/* Useful macros */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+extern struct setup_header hdr;
+extern struct boot_params boot_params;
+
+#define cpu_relax() asm volatile("rep; nop")
+
+static inline void io_delay(void)
+{
+ const u16 DELAY_PORT = 0x80;
+ outb(0, DELAY_PORT);
+}
+
+/* These functions are used to reference data in other segments. */
+
+static inline u16 ds(void)
+{
+ u16 seg;
+ asm("movw %%ds,%0" : "=rm" (seg));
+ return seg;
+}
+
+static inline void set_fs(u16 seg)
+{
+ asm volatile("movw %0,%%fs" : : "rm" (seg));
+}
+static inline u16 fs(void)
+{
+ u16 seg;
+ asm volatile("movw %%fs,%0" : "=rm" (seg));
+ return seg;
+}
+
+static inline void set_gs(u16 seg)
+{
+ asm volatile("movw %0,%%gs" : : "rm" (seg));
+}
+static inline u16 gs(void)
+{
+ u16 seg;
+ asm volatile("movw %%gs,%0" : "=rm" (seg));
+ return seg;
+}
+
+typedef unsigned int addr_t;
+
+static inline u8 rdfs8(addr_t addr)
+{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ u8 v;
+ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
+ return v;
+}
+static inline u16 rdfs16(addr_t addr)
+{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ u16 v;
+ asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+}
+static inline u32 rdfs32(addr_t addr)
+{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ u32 v;
+ asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+}
+
+static inline void wrfs8(u8 v, addr_t addr)
+{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
+}
+static inline void wrfs16(u16 v, addr_t addr)
+{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+}
+static inline void wrfs32(u32 v, addr_t addr)
+{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+}
+
+static inline u8 rdgs8(addr_t addr)
+{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ u8 v;
+ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
+ return v;
+}
+static inline u16 rdgs16(addr_t addr)
+{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ u16 v;
+ asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+}
+static inline u32 rdgs32(addr_t addr)
+{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ u32 v;
+ asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+}
+
+static inline void wrgs8(u8 v, addr_t addr)
+{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
+}
+static inline void wrgs16(u16 v, addr_t addr)
+{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
+}
+static inline void wrgs32(u32 v, addr_t addr)
+{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
+}
+
+/* Note: these only return true/false, not a signed return value! */
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
+{
+ bool diff;
+ asm volatile("fs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
+{
+ bool diff;
+ asm volatile("gs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
+/* Heap -- available for dynamic lists. */
+extern char _end[];
+extern char *HEAP;
+extern char *heap_end;
+#define RESET_HEAP() ((void *)( HEAP = _end ))
+static inline char *__get_heap(size_t s, size_t a, size_t n)
+{
+ char *tmp;
+
+ HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
+ tmp = HEAP;
+ HEAP += s*n;
+ return tmp;
+}
+#define GET_HEAP(type, n) \
+ ((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
+
+static inline bool heap_free(size_t n)
+{
+ return (int)(heap_end-HEAP) >= (int)n;
+}
+
+/* copy.S */
+
+void copy_to_fs(addr_t dst, void *src, size_t len);
+void *copy_from_fs(void *dst, addr_t src, size_t len);
+void copy_to_gs(addr_t dst, void *src, size_t len);
+void *copy_from_gs(void *dst, addr_t src, size_t len);
+
+/* a20.c */
+int enable_a20(void);
+
+/* apm.c */
+int query_apm_bios(void);
+
+/* bioscall.c */
+struct biosregs {
+ union {
+ struct {
+ u32 edi;
+ u32 esi;
+ u32 ebp;
+ u32 _esp;
+ u32 ebx;
+ u32 edx;
+ u32 ecx;
+ u32 eax;
+ u32 _fsgs;
+ u32 _dses;
+ u32 eflags;
+ };
+ struct {
+ u16 di, hdi;
+ u16 si, hsi;
+ u16 bp, hbp;
+ u16 _sp, _hsp;
+ u16 bx, hbx;
+ u16 dx, hdx;
+ u16 cx, hcx;
+ u16 ax, hax;
+ u16 gs, fs;
+ u16 es, ds;
+ u16 flags, hflags;
+ };
+ struct {
+ u8 dil, dih, edi2, edi3;
+ u8 sil, sih, esi2, esi3;
+ u8 bpl, bph, ebp2, ebp3;
+ u8 _spl, _sph, _esp2, _esp3;
+ u8 bl, bh, ebx2, ebx3;
+ u8 dl, dh, edx2, edx3;
+ u8 cl, ch, ecx2, ecx3;
+ u8 al, ah, eax2, eax3;
+ };
+ };
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
+/* cmdline.c */
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
+static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
+{
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
+}
+
+static inline int cmdline_find_option_bool(const char *option)
+{
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option_bool(cmd_line_ptr, option);
+}
+
+/* cpu.c, cpucheck.c */
+int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
+int check_knl_erratum(void);
+int validate_cpu(void);
+
+/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+
+/* edd.c */
+void query_edd(void);
+
+/* header.S */
+void __attribute__((noreturn)) die(void);
+
+/* memory.c */
+void detect_memory(void);
+
+/* pm.c */
+void __attribute__((noreturn)) go_to_protected_mode(void);
+
+/* pmjump.S */
+void __attribute__((noreturn))
+ protected_mode_jump(u32 entrypoint, u32 bootparams);
+
+/* printf.c */
+int sprintf(char *buf, const char *fmt, ...);
+int vsprintf(char *buf, const char *fmt, va_list args);
+int printf(const char *fmt, ...);
+
+/* regs.c */
+void initregs(struct biosregs *regs);
+
+/* string.c */
+int strcmp(const char *str1, const char *str2);
+int strncmp(const char *cs, const char *ct, size_t count);
+size_t strnlen(const char *s, size_t maxlen);
+unsigned int atou(const char *s);
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
+size_t strlen(const char *s);
+char *strchr(const char *s, int c);
+
+/* tty.c */
+void puts(const char *);
+void putchar(int);
+int getchar(void);
+void kbd_flush(void);
+int getchar_timeout(void);
+
+/* video.c */
+void set_video(void);
+
+/* video-mode.c */
+int set_mode(u16 mode);
+int mode_defined(u16 mode);
+void probe_cards(int unsafe);
+
+/* video-vesa.c */
+void vesa_store_edid(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* BOOT_BOOT_H */
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
new file mode 100644
index 000000000..21d56ae83
--- /dev/null
+++ b/arch/x86/boot/cmdline.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple command-line parser for early boot.
+ */
+
+#include "boot.h"
+
+static inline int myisspace(u8 c)
+{
+ return c <= ' '; /* Close enough approximation */
+}
+
+/*
+ * Find a non-boolean option, that is, "option=argument". In accordance
+ * with standard Linux practice, if this option is repeated, this returns
+ * the last instance on the command line.
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
+{
+ addr_t cptr;
+ char c;
+ int len = -1;
+ const char *opptr = NULL;
+ char *bufptr = buffer;
+ enum {
+ st_wordstart, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ st_bufcpy /* Copying this to buffer */
+ } state = st_wordstart;
+
+ if (!cmdline_ptr)
+ return -1; /* No command line */
+
+ cptr = cmdline_ptr & 0xf;
+ set_fs(cmdline_ptr >> 4);
+
+ while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
+ switch (state) {
+ case st_wordstart:
+ if (myisspace(c))
+ break;
+
+ /* else */
+ state = st_wordcmp;
+ opptr = option;
+ fallthrough;
+
+ case st_wordcmp:
+ if (c == '=' && !*opptr) {
+ len = 0;
+ bufptr = buffer;
+ state = st_bufcpy;
+ } else if (myisspace(c)) {
+ state = st_wordstart;
+ } else if (c != *opptr++) {
+ state = st_wordskip;
+ }
+ break;
+
+ case st_wordskip:
+ if (myisspace(c))
+ state = st_wordstart;
+ break;
+
+ case st_bufcpy:
+ if (myisspace(c)) {
+ state = st_wordstart;
+ } else {
+ if (len < bufsize-1)
+ *bufptr++ = c;
+ len++;
+ }
+ break;
+ }
+ }
+
+ if (bufsize)
+ *bufptr = '\0';
+
+ return len;
+}
+
+/*
+ * Find a boolean option (like quiet,noapic,nosmp....)
+ *
+ * Returns the position of that option (starts counting with 1)
+ * or 0 on not found
+ */
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
+{
+ addr_t cptr;
+ char c;
+ int pos = 0, wstart = 0;
+ const char *opptr = NULL;
+ enum {
+ st_wordstart, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ } state = st_wordstart;
+
+ if (!cmdline_ptr)
+ return -1; /* No command line */
+
+ cptr = cmdline_ptr & 0xf;
+ set_fs(cmdline_ptr >> 4);
+
+ while (cptr < 0x10000) {
+ c = rdfs8(cptr++);
+ pos++;
+
+ switch (state) {
+ case st_wordstart:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ wstart = pos;
+ fallthrough;
+
+ case st_wordcmp:
+ if (!*opptr)
+ if (!c || myisspace(c))
+ return wstart;
+ else
+ state = st_wordskip;
+ else if (!c)
+ return 0;
+ else if (c != *opptr++)
+ state = st_wordskip;
+ break;
+
+ case st_wordskip:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ state = st_wordstart;
+ break;
+ }
+ }
+
+ return 0; /* Buffer overrun */
+}
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
new file mode 100644
index 000000000..25805199a
--- /dev/null
+++ b/arch/x86/boot/compressed/.gitignore
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+relocs
+vmlinux.bin.all
+vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
new file mode 100644
index 000000000..15b7b403a
--- /dev/null
+++ b/arch/x86/boot/compressed/Makefile
@@ -0,0 +1,161 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# linux/arch/x86/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+# vmlinuz is:
+# decompression code (*.o)
+# asm globals (piggy.S), including:
+# vmlinux.bin.(gz|bz2|lzma|...)
+#
+# vmlinux.bin is:
+# vmlinux stripped of debugging and comments
+# vmlinux.bin.all is:
+# vmlinux.bin + vmlinux.relocs
+# vmlinux.bin.(gz|bz2|lzma|...) is:
+# (see scripts/Makefile.lib size_append)
+# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
+
+# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
+
+# CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in
+# case of cross compiling, as it has the '--target=' flag, which is needed to
+# avoid errors with '-march=i386', and future flags may depend on the target to
+# be valid.
+KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
+KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+cflags-$(CONFIG_X86_32) := -march=i386
+cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += -mno-mmx -mno-sse
+KBUILD_CFLAGS += -ffreestanding -fshort-wchar
+KBUILD_CFLAGS += -fno-stack-protector
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
+# Disable relocation relaxation in case the link is not PIE.
+KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no)
+KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
+
+# sev.c indirectly inludes inat-table.h which is generated during
+# compilation and stored in $(objtree). Add the directory to the includes so
+# that the compiler finds it even with out-of-tree builds (make O=/some/path).
+CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
+
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
+UBSAN_SANITIZE :=n
+
+KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
+LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
+LDFLAGS_vmlinux += -z noexecstack
+ifeq ($(CONFIG_LD_IS_BFD),y)
+LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
+endif
+LDFLAGS_vmlinux += -T
+
+hostprogs := mkpiggy
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+
+quiet_cmd_voffset = VOFFSET $@
+ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+
+targets += ../voffset.h
+
+$(obj)/../voffset.h: vmlinux FORCE
+ $(call if_changed,voffset)
+
+$(obj)/misc.o: $(obj)/../voffset.h
+
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
+ $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
+ $(obj)/piggy.o $(obj)/cpuflags.o
+
+vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
+ifdef CONFIG_X86_64
+ vmlinux-objs-y += $(obj)/ident_map_64.o
+ vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
+ vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
+endif
+
+vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
+
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
+efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
+
+$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
+ $(call if_changed,ld)
+
+OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
+
+CMD_RELOCS = arch/x86/tools/relocs
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux FORCE
+ $(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,zstd22_with_size)
+
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZMA) := lzma
+suffix-$(CONFIG_KERNEL_XZ) := xz
+suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
+suffix-$(CONFIG_KERNEL_ZSTD) := zst
+
+quiet_cmd_mkpiggy = MKPIGGY $@
+ cmd_mkpiggy = $(obj)/mkpiggy $< > $@
+
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+ $(call if_changed,mkpiggy)
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
new file mode 100644
index 000000000..9caf89063
--- /dev/null
+++ b/arch/x86/boot/compressed/acpi.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BOOT_CTYPE_H
+#include "misc.h"
+#include "error.h"
+#include "../string.h"
+#include "efi.h"
+
+#include <linux/numa.h>
+
+/*
+ * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
+ * for termination.
+ */
+#define MAX_ACPI_ARG_LENGTH 10
+
+/*
+ * Immovable memory regions representation. Max amount of memory regions is
+ * MAX_NUMNODES*2.
+ */
+struct mem_vector immovable_mem[MAX_NUMNODES*2];
+
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
+{
+#ifdef CONFIG_EFI
+ unsigned long rsdp_addr;
+ int ret;
+
+ /*
+ * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
+ * ACPI_TABLE_GUID because it has more features.
+ */
+ rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_20_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+
+ /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
+ rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+
+ debug_putstr("Error getting RSDP address.\n");
+#endif
+ return 0;
+}
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+ unsigned long cfg_tbl_pa = 0;
+ unsigned int cfg_tbl_len;
+ unsigned long systab_pa;
+ unsigned int nr_tables;
+ enum efi_type et;
+ int ret;
+
+ et = efi_get_type(boot_params);
+ if (et == EFI_TYPE_NONE)
+ return 0;
+
+ systab_pa = efi_get_system_table(boot_params);
+ if (!systab_pa)
+ error("EFI support advertised, but unable to locate system table.");
+
+ ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+ if (ret || !cfg_tbl_pa)
+ error("EFI config table not found.");
+
+ return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len);
+#else
+ return 0;
+#endif
+}
+
+static u8 compute_checksum(u8 *buffer, u32 length)
+{
+ u8 *end = buffer + length;
+ u8 sum = 0;
+
+ while (buffer < end)
+ sum += *(buffer++);
+
+ return sum;
+}
+
+/* Search a block of memory for the RSDP signature. */
+static u8 *scan_mem_for_rsdp(u8 *start, u32 length)
+{
+ struct acpi_table_rsdp *rsdp;
+ u8 *address, *end;
+
+ end = start + length;
+
+ /* Search from given start address for the requested length */
+ for (address = start; address < end; address += ACPI_RSDP_SCAN_STEP) {
+ /*
+ * Both RSDP signature and checksum must be correct.
+ * Note: Sometimes there exists more than one RSDP in memory;
+ * the valid RSDP has a valid checksum, all others have an
+ * invalid checksum.
+ */
+ rsdp = (struct acpi_table_rsdp *)address;
+
+ /* BAD Signature */
+ if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature))
+ continue;
+
+ /* Check the standard checksum */
+ if (compute_checksum((u8 *)rsdp, ACPI_RSDP_CHECKSUM_LENGTH))
+ continue;
+
+ /* Check extended checksum if table version >= 2 */
+ if ((rsdp->revision >= 2) &&
+ (compute_checksum((u8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH)))
+ continue;
+
+ /* Signature and checksum valid, we have found a real RSDP */
+ return address;
+ }
+ return NULL;
+}
+
+/* Search RSDP address in EBDA. */
+static acpi_physical_address bios_get_rsdp_addr(void)
+{
+ unsigned long address;
+ u8 *rsdp;
+
+ /* Get the location of the Extended BIOS Data Area (EBDA) */
+ address = *(u16 *)ACPI_EBDA_PTR_LOCATION;
+ address <<= 4;
+
+ /*
+ * Search EBDA paragraphs (EBDA is required to be a minimum of
+ * 1K length)
+ */
+ if (address > 0x400) {
+ rsdp = scan_mem_for_rsdp((u8 *)address, ACPI_EBDA_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+ }
+
+ /* Search upper memory: 16-byte boundaries in E0000h-FFFFFh */
+ rsdp = scan_mem_for_rsdp((u8 *) ACPI_HI_RSDP_WINDOW_BASE,
+ ACPI_HI_RSDP_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+
+ return 0;
+}
+
+/* Return RSDP address on success, otherwise 0. */
+acpi_physical_address get_rsdp_addr(void)
+{
+ acpi_physical_address pa;
+
+ pa = boot_params->acpi_rsdp_addr;
+
+ if (!pa)
+ pa = efi_get_rsdp_addr();
+
+ if (!pa)
+ pa = bios_get_rsdp_addr();
+
+ return pa;
+}
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static unsigned long get_cmdline_acpi_rsdp(void)
+{
+ unsigned long addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (boot_kstrtoul(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
+/* Compute SRAT address from RSDP. */
+static unsigned long get_acpi_srat_table(void)
+{
+ unsigned long root_table, acpi_table;
+ struct acpi_table_header *header;
+ struct acpi_table_rsdp *rsdp;
+ u32 num_entries, size, len;
+ char arg[10];
+ u8 *entry;
+
+ /*
+ * Check whether we were given an RSDP on the command line. We don't
+ * stash this in boot params because the kernel itself may have
+ * different ideas about whether to trust a command-line parameter.
+ */
+ rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+ if (!rsdp)
+ rsdp = (struct acpi_table_rsdp *)(long)
+ boot_params->acpi_rsdp_addr;
+
+ if (!rsdp)
+ return 0;
+
+ /* Get ACPI root table from RSDP.*/
+ if (!(cmdline_find_option("acpi", arg, sizeof(arg)) == 4 &&
+ !strncmp(arg, "rsdt", 4)) &&
+ rsdp->xsdt_physical_address &&
+ rsdp->revision > 1) {
+ root_table = rsdp->xsdt_physical_address;
+ size = ACPI_XSDT_ENTRY_SIZE;
+ } else {
+ root_table = rsdp->rsdt_physical_address;
+ size = ACPI_RSDT_ENTRY_SIZE;
+ }
+
+ if (!root_table)
+ return 0;
+
+ header = (struct acpi_table_header *)root_table;
+ len = header->length;
+ if (len < sizeof(struct acpi_table_header) + size)
+ return 0;
+
+ num_entries = (len - sizeof(struct acpi_table_header)) / size;
+ entry = (u8 *)(root_table + sizeof(struct acpi_table_header));
+
+ while (num_entries--) {
+ if (size == ACPI_RSDT_ENTRY_SIZE)
+ acpi_table = *(u32 *)entry;
+ else
+ acpi_table = *(u64 *)entry;
+
+ if (acpi_table) {
+ header = (struct acpi_table_header *)acpi_table;
+
+ if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_SRAT))
+ return acpi_table;
+ }
+ entry += size;
+ }
+ return 0;
+}
+
+/**
+ * count_immovable_mem_regions - Parse SRAT and cache the immovable
+ * memory regions into the immovable_mem array.
+ *
+ * Return the number of immovable memory regions on success, 0 on failure:
+ *
+ * - Too many immovable memory regions
+ * - ACPI off or no SRAT found
+ * - No immovable memory region found.
+ */
+int count_immovable_mem_regions(void)
+{
+ unsigned long table_addr, table_end, table;
+ struct acpi_subtable_header *sub_table;
+ struct acpi_table_header *table_header;
+ char arg[MAX_ACPI_ARG_LENGTH];
+ int num = 0;
+
+ if (cmdline_find_option("acpi", arg, sizeof(arg)) == 3 &&
+ !strncmp(arg, "off", 3))
+ return 0;
+
+ table_addr = get_acpi_srat_table();
+ if (!table_addr)
+ return 0;
+
+ table_header = (struct acpi_table_header *)table_addr;
+ table_end = table_addr + table_header->length;
+ table = table_addr + sizeof(struct acpi_table_srat);
+
+ while (table + sizeof(struct acpi_subtable_header) < table_end) {
+
+ sub_table = (struct acpi_subtable_header *)table;
+ if (!sub_table->length) {
+ debug_putstr("Invalid zero length SRAT subtable.\n");
+ return 0;
+ }
+
+ if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+ struct acpi_srat_mem_affinity *ma;
+
+ ma = (struct acpi_srat_mem_affinity *)sub_table;
+ if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && ma->length) {
+ immovable_mem[num].start = ma->base_address;
+ immovable_mem[num].size = ma->length;
+ num++;
+ }
+
+ if (num >= MAX_NUMNODES*2) {
+ debug_putstr("Too many immovable memory regions, aborting.\n");
+ return 0;
+ }
+ }
+ table += sub_table->length;
+ }
+ return num;
+}
+#endif /* CONFIG_RANDOMIZE_BASE && CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
new file mode 100644
index 000000000..f1add5d85
--- /dev/null
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "misc.h"
+
+static unsigned long fs;
+static inline void set_fs(unsigned long seg)
+{
+ fs = seg << 4; /* shift it back */
+}
+typedef unsigned long addr_t;
+static inline char rdfs8(addr_t addr)
+{
+ return *((char *)(fs + addr));
+}
+#include "../cmdline.c"
+unsigned long get_cmd_line_ptr(void)
+{
+ unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
+
+ cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+}
+int cmdline_find_option(const char *option, char *buffer, int bufsize)
+{
+ return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
+}
+int cmdline_find_option_bool(const char *option)
+{
+ return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
+}
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 000000000..0cc132389
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+ get_cpuflags();
+
+ return test_bit(flag, cpu.flags);
+}
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
new file mode 100644
index 000000000..70a8d1706
--- /dev/null
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -0,0 +1,6 @@
+#include "misc.h"
+
+/* This might be accessed before .bss is cleared, so use .data instead. */
+int early_serial_base __section(".data");
+
+#include "../early_serial_console.c"
diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
new file mode 100644
index 000000000..6edd034b0
--- /dev/null
+++ b/arch/x86/boot/compressed/efi.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helpers for early access to EFI configuration table.
+ *
+ * Originally derived from arch/x86/boot/compressed/acpi.c
+ */
+
+#include "misc.h"
+
+/**
+ * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment.
+ *
+ * @bp: pointer to boot_params
+ *
+ * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise.
+ */
+enum efi_type efi_get_type(struct boot_params *bp)
+{
+ struct efi_info *ei;
+ enum efi_type et;
+ const char *sig;
+
+ ei = &bp->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+
+ if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ et = EFI_TYPE_64;
+ } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+ et = EFI_TYPE_32;
+ } else {
+ debug_putstr("No EFI environment detected.\n");
+ et = EFI_TYPE_NONE;
+ }
+
+#ifndef CONFIG_X86_64
+ /*
+ * Existing callers like acpi.c treat this case as an indicator to
+ * fall-through to non-EFI, rather than an error, so maintain that
+ * functionality here as well.
+ */
+ if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+ debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n");
+ et = EFI_TYPE_NONE;
+ }
+#endif
+
+ return et;
+}
+
+/**
+ * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address
+ * of the EFI system table.
+ *
+ * @bp: pointer to boot_params
+ *
+ * Return: EFI system table address on success. On error, return 0.
+ */
+unsigned long efi_get_system_table(struct boot_params *bp)
+{
+ unsigned long sys_tbl_pa;
+ struct efi_info *ei;
+ enum efi_type et;
+
+ /* Get systab from boot params. */
+ ei = &bp->efi_info;
+#ifdef CONFIG_X86_64
+ sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+ sys_tbl_pa = ei->efi_systab;
+#endif
+ if (!sys_tbl_pa) {
+ debug_putstr("EFI system table not found.");
+ return 0;
+ }
+
+ return sys_tbl_pa;
+}
+
+/*
+ * EFI config table address changes to virtual address after boot, which may
+ * not be accessible for the kexec'd kernel. To address this, kexec provides
+ * the initial physical address via a struct setup_data entry, which is
+ * checked for here, along with some sanity checks.
+ */
+static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp,
+ enum efi_type et)
+{
+#ifdef CONFIG_X86_64
+ struct efi_setup_data *esd = NULL;
+ struct setup_data *data;
+ u64 pa_data;
+
+ pa_data = bp->hdr.setup_data;
+ while (pa_data) {
+ data = (struct setup_data *)pa_data;
+ if (data->type == SETUP_EFI) {
+ esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+ break;
+ }
+
+ pa_data = data->next;
+ }
+
+ /*
+ * Original ACPI code falls back to attempting normal EFI boot in these
+ * cases, so maintain existing behavior by indicating non-kexec
+ * environment to the caller, but print them for debugging.
+ */
+ if (esd && !esd->tables) {
+ debug_putstr("kexec EFI environment missing valid configuration table.\n");
+ return NULL;
+ }
+
+ return esd;
+#endif
+ return NULL;
+}
+
+/**
+ * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical
+ * address of EFI configuration table.
+ *
+ * @bp: pointer to boot_params
+ * @cfg_tbl_pa: location to store physical address of config table
+ * @cfg_tbl_len: location to store number of config table entries
+ *
+ * Return: 0 on success. On error, return params are left unchanged.
+ */
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len)
+{
+ unsigned long sys_tbl_pa;
+ enum efi_type et;
+ int ret;
+
+ if (!cfg_tbl_pa || !cfg_tbl_len)
+ return -EINVAL;
+
+ sys_tbl_pa = efi_get_system_table(bp);
+ if (!sys_tbl_pa)
+ return -EINVAL;
+
+ /* Handle EFI bitness properly */
+ et = efi_get_type(bp);
+ if (et == EFI_TYPE_64) {
+ efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa;
+ struct efi_setup_data *esd;
+
+ /* kexec provides an alternative EFI conf table, check for it. */
+ esd = get_kexec_setup_data(bp, et);
+
+ *cfg_tbl_pa = esd ? esd->tables : stbl->tables;
+ *cfg_tbl_len = stbl->nr_tables;
+ } else if (et == EFI_TYPE_32) {
+ efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa;
+
+ *cfg_tbl_pa = stbl->tables;
+ *cfg_tbl_len = stbl->nr_tables;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Get vendor table address/guid from EFI config table at the given index */
+static int get_vendor_table(void *cfg_tbl, unsigned int idx,
+ unsigned long *vendor_tbl_pa,
+ efi_guid_t *vendor_tbl_guid,
+ enum efi_type et)
+{
+ if (et == EFI_TYPE_64) {
+ efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) {
+ debug_putstr("Error: EFI config table entry located above 4GB.\n");
+ return -EINVAL;
+ }
+
+ *vendor_tbl_pa = tbl_entry->table;
+ *vendor_tbl_guid = tbl_entry->guid;
+
+ } else if (et == EFI_TYPE_32) {
+ efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx;
+
+ *vendor_tbl_pa = tbl_entry->table;
+ *vendor_tbl_guid = tbl_entry->guid;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * efi_find_vendor_table - Given EFI config table, search it for the physical
+ * address of the vendor table associated with GUID.
+ *
+ * @bp: pointer to boot_params
+ * @cfg_tbl_pa: pointer to EFI configuration table
+ * @cfg_tbl_len: number of entries in EFI configuration table
+ * @guid: GUID of vendor table
+ *
+ * Return: vendor table address on success. On error, return 0.
+ */
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid)
+{
+ enum efi_type et;
+ unsigned int i;
+
+ et = efi_get_type(bp);
+ if (et == EFI_TYPE_NONE)
+ return 0;
+
+ for (i = 0; i < cfg_tbl_len; i++) {
+ unsigned long vendor_tbl_pa;
+ efi_guid_t vendor_tbl_guid;
+ int ret;
+
+ ret = get_vendor_table((void *)cfg_tbl_pa, i,
+ &vendor_tbl_pa,
+ &vendor_tbl_guid, et);
+ if (ret)
+ return 0;
+
+ if (!efi_guidcmp(guid, vendor_tbl_guid))
+ return vendor_tbl_pa;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
new file mode 100644
index 000000000..7db2f41b5
--- /dev/null
+++ b/arch/x86/boot/compressed/efi.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_EFI_H
+#define BOOT_COMPRESSED_EFI_H
+
+#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H)
+#error Please do not include kernel proper namespace headers
+#endif
+
+typedef guid_t efi_guid_t __aligned(__alignof__(u32));
+
+#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \
+ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+ (b) & 0xff, ((b) >> 8) & 0xff, \
+ (c) & 0xff, ((c) >> 8) & 0xff, d } }
+
+#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+
+#define EFI32_LOADER_SIGNATURE "EL32"
+#define EFI64_LOADER_SIGNATURE "EL64"
+
+/*
+ * Generic EFI table header
+ */
+typedef struct {
+ u64 signature;
+ u32 revision;
+ u32 headersize;
+ u32 crc32;
+ u32 reserved;
+} efi_table_hdr_t;
+
+#define EFI_CONVENTIONAL_MEMORY 7
+
+#define EFI_MEMORY_MORE_RELIABLE \
+ ((u64)0x0000000000010000ULL) /* higher reliability */
+#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */
+
+#define EFI_PAGE_SHIFT 12
+
+typedef struct {
+ u32 type;
+ u32 pad;
+ u64 phys_addr;
+ u64 virt_addr;
+ u64 num_pages;
+ u64 attribute;
+} efi_memory_desc_t;
+
+#define efi_early_memdesc_ptr(map, desc_size, n) \
+ (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size)))
+
+typedef struct {
+ efi_guid_t guid;
+ u64 table;
+} efi_config_table_64_t;
+
+typedef struct {
+ efi_guid_t guid;
+ u32 table;
+} efi_config_table_32_t;
+
+typedef struct {
+ efi_table_hdr_t hdr;
+ u64 fw_vendor; /* physical addr of CHAR16 vendor string */
+ u32 fw_revision;
+ u32 __pad1;
+ u64 con_in_handle;
+ u64 con_in;
+ u64 con_out_handle;
+ u64 con_out;
+ u64 stderr_handle;
+ u64 stderr;
+ u64 runtime;
+ u64 boottime;
+ u32 nr_tables;
+ u32 __pad2;
+ u64 tables;
+} efi_system_table_64_t;
+
+typedef struct {
+ efi_table_hdr_t hdr;
+ u32 fw_vendor; /* physical addr of CHAR16 vendor string */
+ u32 fw_revision;
+ u32 con_in_handle;
+ u32 con_in;
+ u32 con_out_handle;
+ u32 con_out;
+ u32 stderr_handle;
+ u32 stderr;
+ u32 runtime;
+ u32 boottime;
+ u32 nr_tables;
+ u32 tables;
+} efi_system_table_32_t;
+
+/* kexec external ABI */
+struct efi_setup_data {
+ u64 fw_vendor;
+ u64 __unused;
+ u64 tables;
+ u64 smbios;
+ u64 reserved[8];
+};
+
+static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
+{
+ return memcmp(&left, &right, sizeof (efi_guid_t));
+}
+
+#ifdef CONFIG_EFI
+bool __pure __efi_soft_reserve_enabled(void);
+
+static inline bool __pure efi_soft_reserve_enabled(void)
+{
+ return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
+ && __efi_soft_reserve_enabled();
+}
+#else
+static inline bool efi_soft_reserve_enabled(void)
+{
+ return false;
+}
+#endif /* CONFIG_EFI */
+#endif /* BOOT_COMPRESSED_EFI_H */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000..67e7edcdf
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT and IDT before we make EFI service
+ * calls.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identity
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+ .code64
+ .text
+SYM_FUNC_START(__efi64_thunk)
+ push %rbp
+ push %rbx
+
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ /* Copy args passed on stack */
+ movq 0x30(%rsp), %rbp
+ movq 0x38(%rsp), %rbx
+ movq 0x40(%rsp), %rax
+
+ /*
+ * Convert x86-64 ABI params to i386 ABI
+ */
+ subq $64, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movl %r8d, 0xc(%rsp)
+ movl %r9d, 0x10(%rsp)
+ movl %ebp, 0x14(%rsp)
+ movl %ebx, 0x18(%rsp)
+ movl %eax, 0x1c(%rsp)
+
+ leaq 0x20(%rsp), %rbx
+ sgdt (%rbx)
+
+ addq $16, %rbx
+ sidt (%rbx)
+
+ leaq 1f(%rip), %rbp
+
+ /*
+ * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
+ * and IDT that was installed when the kernel started executing. The
+ * pointers were saved at the EFI stub entry point in head_64.S.
+ *
+ * Pass the saved DS selector to the 32-bit code, and use far return to
+ * restore the saved CS selector.
+ */
+ leaq efi32_boot_idt(%rip), %rax
+ lidt (%rax)
+ leaq efi32_boot_gdt(%rip), %rax
+ lgdt (%rax)
+
+ movzwl efi32_boot_ds(%rip), %edx
+ movzwq efi32_boot_cs(%rip), %rax
+ pushq %rax
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $64, %rsp
+ movq %rdi, %rax
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+ /* Clear out 32-bit selector from FS and GS */
+ xorl %ebx, %ebx
+ movl %ebx, %fs
+ movl %ebx, %gs
+
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ roll $1, %eax
+ rorq $1, %rax
+
+ pop %rbx
+ pop %rbp
+ RET
+SYM_FUNC_END(__efi64_thunk)
+
+ .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+SYM_FUNC_START_LOCAL(efi_enter32)
+ /* Load firmware selector into data and stack segment registers */
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+ movl %edx, %ss
+
+ /* Reload pgtables */
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode via EFER */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btrl $_EFER_LME, %eax
+ wrmsr
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ /*
+ * Some firmware will return with interrupts enabled. Be sure to
+ * disable them before we switch GDTs and IDTs.
+ */
+ cli
+
+ lidtl (%ebx)
+ subl $16, %ebx
+
+ lgdtl (%ebx)
+
+ movl %cr4, %eax
+ btsl $(X86_CR4_PAE_BIT), %eax
+ movl %eax, %cr4
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ xorl %eax, %eax
+ lldt %ax
+
+ pushl $__KERNEL_CS
+ pushl %ebp
+
+ /* Enable paging */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+ lret
+SYM_FUNC_END(efi_enter32)
+
+ .data
+ .balign 8
+SYM_DATA_START(efi32_boot_gdt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_gdt)
+
+SYM_DATA_START(efi32_boot_idt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_idt)
+
+SYM_DATA_START(efi32_boot_cs)
+ .word 0
+SYM_DATA_END(efi32_boot_cs)
+
+SYM_DATA_START(efi32_boot_ds)
+ .word 0
+SYM_DATA_END(efi32_boot_ds)
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
new file mode 100644
index 000000000..c881878e5
--- /dev/null
+++ b/arch/x86/boot/compressed/error.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Callers outside of misc.c need access to the error reporting routines,
+ * but the *_putstr() functions need to stay in misc.c because of how
+ * memcpy() and memmove() are defined for the compressed boot environment.
+ */
+#include "misc.h"
+#include "error.h"
+
+void warn(char *m)
+{
+ error_putstr("\n\n");
+ error_putstr(m);
+ error_putstr("\n\n");
+}
+
+void error(char *m)
+{
+ warn(m);
+ error_putstr(" -- System halted");
+
+ while (1)
+ asm("hlt");
+}
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
new file mode 100644
index 000000000..1de582118
--- /dev/null
+++ b/arch/x86/boot/compressed/error.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_ERROR_H
+#define BOOT_COMPRESSED_ERROR_H
+
+#include <linux/compiler.h>
+
+void warn(char *m);
+void error(char *m) __noreturn;
+
+#endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
new file mode 100644
index 000000000..3b354eb95
--- /dev/null
+++ b/arch/x86/boot/compressed/head_32.S
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/boot/head.S
+ *
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in
+ * laptops may need to access the BIOS data stored there. This is also
+ * useful for future device drivers that either access the BIOS via VM86
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+ .text
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page_types.h>
+#include <asm/boot.h>
+#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
+
+/*
+ * These symbols needed to be marked as .hidden to prevent the BFD linker from
+ * generating R_386_32 (rather than R_386_RELATIVE) relocations for them when
+ * the 32-bit compressed kernel is linked as PIE. This is no longer necessary,
+ * but it doesn't hurt to keep them .hidden.
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _end
+
+ __HEAD
+SYM_FUNC_START(startup_32)
+ cld
+ cli
+
+/*
+ * Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x1e4 (defined as a scratch field) are used as the stack
+ * for this calculation. Only 4 bytes are needed.
+ */
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %edx
+ addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %edx
+
+ /* Load new GDT */
+ leal gdt@GOTOFF(%edx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
+/*
+ * %edx contains the address we are loaded at by the boot loader (plus the
+ * offset to the GOT). The below code calculates %ebx to be the address where
+ * we should move the kernel image temporarily for safe in-place decompression
+ * (again, plus the offset to the GOT).
+ *
+ * %ebp is calculated to be the address that the kernel will be decompressed to.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ leal startup_32@GOTOFF(%edx), %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32() will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry() will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl image_offset@GOTOFF(%edx), %ebx
+#endif
+
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jae 1f
+#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
+
+ movl %ebx, %ebp // Save the output address for later
+ /* Target address to relocate to for decompression */
+ addl BP_init_size(%esi), %ebx
+ subl $_end@GOTOFF, %ebx
+
+ /* Set up the stack */
+ leal boot_stack_end@GOTOFF(%ebx), %esp
+
+ /* Zero EFLAGS */
+ pushl $0
+ popfl
+
+/*
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ pushl %esi
+ leal (_bss@GOTOFF-4)(%edx), %esi
+ leal (_bss@GOTOFF-4)(%ebx), %edi
+ movl $(_bss - startup_32), %ecx
+ shrl $2, %ecx
+ std
+ rep movsl
+ cld
+ popl %esi
+
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leal gdt@GOTOFF(%ebx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+/*
+ * Jump to the relocated address.
+ */
+ leal .Lrelocated@GOTOFF(%ebx), %eax
+ jmp *%eax
+SYM_FUNC_END(startup_32)
+
+#ifdef CONFIG_EFI_STUB
+SYM_FUNC_START(efi32_stub_entry)
+ add $0x4, %esp
+ movl 8(%esp), %esi /* save boot_params pointer */
+ call efi_main
+ /* efi_main returns the possibly relocated address of startup_32 */
+ jmp *%eax
+SYM_FUNC_END(efi32_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
+#endif
+
+ .text
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+
+/*
+ * Clear BSS (stack is currently empty)
+ */
+ xorl %eax, %eax
+ leal _bss@GOTOFF(%ebx), %edi
+ leal _ebss@GOTOFF(%ebx), %ecx
+ subl %edi, %ecx
+ shrl $2, %ecx
+ rep stosl
+
+/*
+ * Do the extraction, and jump to the new kernel..
+ */
+ /* push arguments for extract_kernel: */
+
+ pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
+ pushl %ebp /* output address */
+ pushl input_len@GOTOFF(%ebx) /* input_len */
+ leal input_data@GOTOFF(%ebx), %eax
+ pushl %eax /* input_data */
+ leal boot_heap@GOTOFF(%ebx), %eax
+ pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+ call extract_kernel /* returns kernel location in %eax */
+ addl $24, %esp
+
+/*
+ * Jump to the extracted kernel.
+ */
+ xorl %ebx, %ebx
+ jmp *%eax
+SYM_FUNC_END(.Lrelocated)
+
+ .data
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
+ .word 0
+ .quad 0x0000000000000000 /* Reserved */
+ .quad 0x00cf9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+#ifdef CONFIG_EFI_STUB
+SYM_DATA(image_offset, .long 0)
+#endif
+
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+boot_heap:
+ .fill BOOT_HEAP_SIZE, 1, 0
+boot_stack:
+ .fill BOOT_STACK_SIZE, 1, 0
+boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
new file mode 100644
index 000000000..b4bd6df29
--- /dev/null
+++ b/arch/x86/boot/compressed/head_64.S
@@ -0,0 +1,1022 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/boot/head.S
+ *
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in
+ * laptops may need to access the BIOS data stored there. This is also
+ * useful for future device drivers that either access the BIOS via VM86
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+ .code32
+ .text
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/msr.h>
+#include <asm/processor-flags.h>
+#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
+#include <asm/trapnr.h>
+#include "pgtable.h"
+
+/*
+ * Locally defined symbols should be marked hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _end
+
+ __HEAD
+
+/*
+ * This macro gives the relative virtual address of X, i.e. the offset of X
+ * from startup_32. This is the same as the link-time virtual address of X,
+ * since startup_32 is at 0, but defining it this way tells the
+ * assembler/linker that we do not want the actual run-time address of X. This
+ * prevents the linker from trying to create unwanted run-time relocation
+ * entries for the reference when the compressed kernel is linked as PIE.
+ *
+ * A reference X(%reg) will result in the link-time VA of X being stored with
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
+ * adds the 64-bit base address where the kernel is loaded.
+ *
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
+ * and no run-time relocation.
+ *
+ * The macro should be used as a displacement with a base register containing
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
+ * [$ rva(X)].
+ *
+ * This macro can only be used from within the .head.text section, since the
+ * expression requires startup_32 to be in the same section as the code being
+ * assembled.
+ */
+#define rva(X) ((X) - startup_32)
+
+ .code32
+SYM_FUNC_START(startup_32)
+ /*
+ * 32bit entry is 0 and it is ABI so immutable!
+ * If we come here directly from a bootloader,
+ * kernel(text+data+bss+brk) ramdisk, zero_page, command line
+ * all need to be under the 4G limit.
+ */
+ cld
+ cli
+
+/*
+ * Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x1e4 (defined as a scratch field) are used as the stack
+ * for this calculation. Only 4 bytes are needed.
+ */
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $ rva(1b), %ebp
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal rva(gdt)(%ebp), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
+ /* Setup a stack and load CS from current GDT */
+ leal rva(boot_stack_end)(%ebp), %esp
+
+ pushl $__KERNEL32_CS
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
+1:
+
+ /* Setup Exception handling for SEV-ES */
+ call startup32_load_idt
+
+ /* Make sure cpu supports long mode. */
+ call verify_cpu
+ testl %eax, %eax
+ jnz .Lno_longmode
+
+/*
+ * Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl rva(image_offset)(%ebp), %ebx
+#endif
+
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jae 1f
+#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
+
+ /* Target address to relocate to for decompression */
+ addl BP_init_size(%esi), %ebx
+ subl $ rva(_end), %ebx
+
+/*
+ * Prepare for entering 64 bit mode
+ */
+
+ /* Enable PAE mode */
+ movl %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ movl %eax, %cr4
+
+ /*
+ * Build early 4G boot pagetable
+ */
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+ * This will insure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+ call get_sev_encryption_bit
+ xorl %edx, %edx
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+ bts %eax, %edx /* Set encryption mask for page tables */
+ /*
+ * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+ * startup32_check_sev_cbit() will do a check. sev_enable() will
+ * initialize sev_status with all the bits reported by
+ * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+ * needs to be set for now.
+ */
+ movl $1, rva(sev_status)(%ebp)
+1:
+#endif
+
+ /* Initialize Page tables to 0 */
+ leal rva(pgtable)(%ebx), %edi
+ xorl %eax, %eax
+ movl $(BOOT_INIT_PGT_SIZE/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal rva(pgtable + 0)(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+
+ /* Build Level 3 */
+ leal rva(pgtable + 0x1000)(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl %edx, 0x04(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal rva(pgtable + 0x2000)(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal rva(pgtable)(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* After gdt is loaded */
+ xorl %eax, %eax
+ lldt %ax
+ movl $__BOOT_TSS, %eax
+ ltr %ax
+
+ /*
+ * Setup for the jump to 64bit mode
+ *
+ * When the jump is performed we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ leal rva(startup_64)(%ebp), %eax
+#ifdef CONFIG_EFI_MIXED
+ movl rva(efi32_boot_args)(%ebp), %edi
+ testl %edi, %edi
+ jz 1f
+ leal rva(efi64_stub_entry)(%ebp), %eax
+ movl rva(efi32_boot_args+4)(%ebp), %esi
+ movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
+ testl %edx, %edx
+ jnz 1f
+ /*
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+ * shadow space on the stack even if all arguments are passed in
+ * registers. We also need an additional 8 bytes for the space that
+ * would be occupied by the return address, and this also results in
+ * the correct stack alignment for entry.
+ */
+ subl $40, %esp
+ leal rva(efi_pe_entry)(%ebp), %eax
+ movl %edi, %ecx // MS calling convention
+ movl %esi, %edx
+1:
+#endif
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $CR0_STATE, %eax
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+SYM_FUNC_END(startup_32)
+
+#ifdef CONFIG_EFI_MIXED
+ .org 0x190
+SYM_FUNC_START(efi32_stub_entry)
+ add $0x4, %esp /* Discard return address */
+ popl %ecx
+ popl %edx
+ popl %esi
+
+ call 1f
+1: pop %ebp
+ subl $ rva(1b), %ebp
+
+ movl %esi, rva(efi32_boot_args+8)(%ebp)
+SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
+ movl %ecx, rva(efi32_boot_args)(%ebp)
+ movl %edx, rva(efi32_boot_args+4)(%ebp)
+ movb $0, rva(efi_is64)(%ebp)
+
+ /* Save firmware GDTR and code/data selectors */
+ sgdtl rva(efi32_boot_gdt)(%ebp)
+ movw %cs, rva(efi32_boot_cs)(%ebp)
+ movw %ds, rva(efi32_boot_ds)(%ebp)
+
+ /* Store firmware IDT descriptor */
+ sidtl rva(efi32_boot_idt)(%ebp)
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ jmp startup_32
+SYM_FUNC_END(efi32_stub_entry)
+#endif
+
+ .code64
+ .org 0x200
+SYM_CODE_START(startup_64)
+ /*
+ * 64bit entry is 0x200 and it is ABI so immutable!
+ * We come here either from startup_32 or directly from a
+ * 64bit bootloader.
+ * If we come here from a bootloader, kernel(text+data+bss+brk),
+ * ramdisk, zero_page, command line could be above 4G.
+ * We depend on an identity mapped page table being provided
+ * that maps our entire kernel(text+data+bss+brk), zero page
+ * and command line.
+ */
+
+ cld
+ cli
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /*
+ * Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary. %rbp contains the
+ * decompressed kernel start address.
+ *
+ * If it is a relocatable kernel then decompress and run the kernel
+ * from load address aligned to 2MB addr, otherwise decompress and
+ * run the kernel from LOAD_PHYSICAL_ADDR
+ *
+ * We cannot rely on the calculation done in 32-bit mode, since we
+ * may have been invoked via the 64-bit entry point.
+ */
+
+ /* Start with the delta to where the kernel will run at. */
+#ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ movl image_offset(%rip), %eax
+ subq %rax, %rbp
+#endif
+
+ movl BP_kernel_alignment(%rsi), %eax
+ decl %eax
+ addq %rax, %rbp
+ notq %rax
+ andq %rax, %rbp
+ cmpq $LOAD_PHYSICAL_ADDR, %rbp
+ jae 1f
+#endif
+ movq $LOAD_PHYSICAL_ADDR, %rbp
+1:
+
+ /* Target address to relocate to for decompression */
+ movl BP_init_size(%rsi), %ebx
+ subl $ rva(_end), %ebx
+ addq %rbp, %rbx
+
+ /* Set up the stack */
+ leaq rva(boot_stack_end)(%rbx), %rsp
+
+ /*
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we might want to enable 5-level paging or vice versa.
+ *
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
+ *
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
+ *
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+ *
+ * We go though the trampoline even if we don't have to: if we're
+ * already in a desired paging mode. This way the trampoline code gets
+ * tested on every boot.
+ */
+
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt64(%rip), %rax
+ addq %rax, 2(%rax)
+ lgdt (%rax)
+
+ /* Reload CS so IRET returns to a CS actually in the GDT */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+
+ pushq %rsi
+ call load_stage1_idt
+ popq %rsi
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+ * CPUID instructions can be properly handled for SEV-ES guests.
+ *
+ * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+ * CPUID instructions being issued, so go ahead and do that now via
+ * sev_enable(), which will also handle the rest of the SEV-related
+ * detection/setup to ensure that has been done in advance of any dependent
+ * code.
+ */
+ pushq %rsi
+ movq %rsi, %rdi /* real mode address */
+ call sev_enable
+ popq %rsi
+#endif
+
+ /*
+ * paging_prepare() sets up the trampoline and checks if we need to
+ * enable 5-level paging.
+ *
+ * paging_prepare() returns a two-quadword structure which lands
+ * into RDX:RAX:
+ * - Address of the trampoline is returned in RAX.
+ * - Non zero RDX means trampoline needs to enable 5-level
+ * paging.
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ movq %rsi, %rdi /* real mode address */
+ call paging_prepare
+ popq %rsi
+
+ /* Save the trampoline address in RCX */
+ movq %rax, %rcx
+
+ /* Set up 32-bit addressable stack */
+ leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
+
+ /*
+ * Preserve live 64-bit registers on the stack: this is necessary
+ * because the architecture does not guarantee that GPRs will retain
+ * their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %rbp
+ pushq %rbx
+ pushq %rsi
+
+ /*
+ * Push the 64-bit address of trampoline_return() onto the new stack.
+ * It will be used by the trampoline to return to the main code. Due to
+ * the 32-bit mode switch, it cannot be kept it in a register either.
+ */
+ leaq trampoline_return(%rip), %rdi
+ pushq %rdi
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
+ pushq %rax
+ lretq
+trampoline_return:
+ /* Restore live 64-bit registers */
+ popq %rsi
+ popq %rbx
+ popq %rbp
+
+ /* Restore the stack, the 32-bit trampoline uses its own stack */
+ leaq rva(boot_stack_end)(%rbx), %rsp
+
+ /*
+ * cleanup_trampoline() would restore trampoline memory.
+ *
+ * RDI is address of the page table to use instead of page table
+ * in trampoline memory (if required).
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ leaq rva(top_pgtable)(%rbx), %rdi
+ call cleanup_trampoline
+ popq %rsi
+
+ /* Zero EFLAGS */
+ pushq $0
+ popfq
+
+/*
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ pushq %rsi
+ leaq (_bss-8)(%rip), %rsi
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+ shrl $3, %ecx
+ std
+ rep movsq
+ cld
+ popq %rsi
+
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leaq rva(gdt64)(%rbx), %rax
+ leaq rva(gdt)(%rbx), %rdx
+ movq %rdx, 2(%rax)
+ lgdt (%rax)
+
+/*
+ * Jump to the relocated address.
+ */
+ leaq rva(.Lrelocated)(%rbx), %rax
+ jmp *%rax
+SYM_CODE_END(startup_64)
+
+#ifdef CONFIG_EFI_STUB
+ .org 0x390
+SYM_FUNC_START(efi64_stub_entry)
+ and $~0xf, %rsp /* realign the stack */
+ movq %rdx, %rbx /* save boot_params pointer */
+ call efi_main
+ movq %rbx,%rsi
+ leaq rva(startup_64)(%rax), %rax
+ jmp *%rax
+SYM_FUNC_END(efi64_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
+#endif
+
+ .text
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+
+/*
+ * Clear BSS (stack is currently empty)
+ */
+ xorl %eax, %eax
+ leaq _bss(%rip), %rdi
+ leaq _ebss(%rip), %rcx
+ subq %rdi, %rcx
+ shrq $3, %rcx
+ rep stosq
+
+ pushq %rsi
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+ movq (%rsp), %rdi
+ call initialize_identity_maps
+ popq %rsi
+
+/*
+ * Do the extraction, and jump to the new kernel..
+ */
+ pushq %rsi /* Save the real mode argument */
+ movq %rsi, %rdi /* real mode address */
+ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
+ leaq input_data(%rip), %rdx /* input_data */
+ movl input_len(%rip), %ecx /* input_len */
+ movq %rbp, %r8 /* output target address */
+ movl output_len(%rip), %r9d /* decompressed length, end of relocs */
+ call extract_kernel /* returns kernel location in %rax */
+ popq %rsi
+
+/*
+ * Jump to the decompressed kernel.
+ */
+ jmp *%rax
+SYM_FUNC_END(.Lrelocated)
+
+ .code32
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+ * Return address is at the top of the stack (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX means trampoline needs to enable 5-level paging.
+ */
+SYM_CODE_START(trampoline_32bit_src)
+ /* Set up data and stack segments */
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %ss
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Check what paging mode we want to be in after the trampoline */
+ testl %edx, %edx
+ jz 1f
+
+ /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jnz 3f
+ jmp 2f
+1:
+ /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jz 3f
+2:
+ /* Point CR3 to the trampoline's new top level page table */
+ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+ movl %eax, %cr3
+3:
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+ pushl %ecx
+ pushl %edx
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+ jc 1f
+ wrmsr
+1: popl %edx
+ popl %ecx
+
+#ifdef CONFIG_X86_MCE
+ /*
+ * Preserve CR4.MCE if the kernel will enable #MC support.
+ * Clearing MCE may fault in some environments (that also force #MC
+ * support). Any machine check that occurs before #MC support is fully
+ * configured will crash the system regardless of the CR4.MCE value set
+ * here.
+ */
+ movl %cr4, %eax
+ andl $X86_CR4_MCE, %eax
+#else
+ movl $0, %eax
+#endif
+
+ /* Enable PAE and LA57 (if required) paging modes */
+ orl $X86_CR4_PAE, %eax
+ testl %edx, %edx
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
+ movl %eax, %cr4
+
+ /* Calculate address of paging_enabled() once we are executing in the trampoline */
+ leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+
+ /* Prepare the stack for far return to Long Mode */
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enable paging again. */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ lret
+SYM_CODE_END(trampoline_32bit_src)
+
+ .code64
+SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
+ /* Return from the trampoline */
+ retq
+SYM_FUNC_END(.Lpaging_enabled)
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .code32
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
+1:
+ hlt
+ jmp 1b
+SYM_FUNC_END(.Lno_longmode)
+
+#include "../../kernel/verify_cpu.S"
+
+ .data
+SYM_DATA_START_LOCAL(gdt64)
+ .word gdt_end - gdt - 1
+ .quad gdt - gdt64
+SYM_DATA_END(gdt64)
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
+ .word 0
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x0080890000000000 /* TS descriptor */
+ .quad 0x0000000000000000 /* TS continued */
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt - 1
+ .quad 0
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+SYM_DATA_START(boot32_idt_desc)
+ .word boot32_idt_end - boot32_idt - 1
+ .long 0
+SYM_DATA_END(boot32_idt_desc)
+ .balign 8
+SYM_DATA_START(boot32_idt)
+ .rept 32
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
+#endif
+
+#ifdef CONFIG_EFI_STUB
+SYM_DATA(image_offset, .long 0)
+#endif
+#ifdef CONFIG_EFI_MIXED
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
+SYM_DATA(efi_is64, .byte 1)
+
+#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
+#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
+#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
+
+ __HEAD
+ .code32
+SYM_FUNC_START(efi32_pe_entry)
+/*
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+ * efi_system_table_32_t *sys_table)
+ */
+
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %eax // dummy push to allocate loaded_image
+
+ pushl %ebx // save callee-save registers
+ pushl %edi
+
+ call verify_cpu // check for long mode support
+ testl %eax, %eax
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
+ jnz 2f
+
+ call 1f
+1: pop %ebx
+ subl $ rva(1b), %ebx
+
+ /* Get the loaded image protocol pointer from the image handle */
+ leal -4(%ebp), %eax
+ pushl %eax // &loaded_image
+ leal rva(loaded_image_proto)(%ebx), %eax
+ pushl %eax // pass the GUID address
+ pushl 8(%ebp) // pass the image handle
+
+ /*
+ * Note the alignment of the stack frame.
+ * sys_table
+ * handle <-- 16-byte aligned on entry by ABI
+ * return address
+ * frame pointer
+ * loaded_image <-- local variable
+ * saved %ebx <-- 16-byte aligned here
+ * saved %edi
+ * &loaded_image
+ * &loaded_image_proto
+ * handle <-- 16-byte aligned for call to handle_protocol
+ */
+
+ movl 12(%ebp), %eax // sys_table
+ movl ST32_boottime(%eax), %eax // sys_table->boottime
+ call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
+ addl $12, %esp // restore argument space
+ testl %eax, %eax
+ jnz 2f
+
+ movl 8(%ebp), %ecx // image_handle
+ movl 12(%ebp), %edx // sys_table
+ movl -4(%ebp), %esi // loaded_image
+ movl LI32_image_base(%esi), %esi // loaded_image->image_base
+ movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
+ /*
+ * We need to set the image_offset variable here since startup_32() will
+ * use it before we get to the 64-bit efi_pe_entry() in C code.
+ */
+ subl %esi, %ebx
+ movl %ebx, rva(image_offset)(%ebp) // save image_offset
+ jmp efi32_pe_stub_entry
+
+2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+ RET
+SYM_FUNC_END(efi32_pe_entry)
+
+ .section ".rodata"
+ /* EFI loaded image protocol GUID */
+ .balign 4
+SYM_DATA_START_LOCAL(loaded_image_proto)
+ .long 0x5b1b31a1
+ .word 0x9562, 0x11d2
+ .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
+SYM_DATA_END(loaded_image_proto)
+#endif
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ __HEAD
+ .code32
+/*
+ * Write an IDT entry into boot32_idt
+ *
+ * Parameters:
+ *
+ * %eax: Handler address
+ * %edx: Vector number
+ *
+ * Physical offset is expected in %ebp
+ */
+SYM_FUNC_START(startup32_set_idt_entry)
+ push %ebx
+ push %ecx
+
+ /* IDT entry address to %ebx */
+ leal rva(boot32_idt)(%ebp), %ebx
+ shl $3, %edx
+ addl %edx, %ebx
+
+ /* Build IDT entry, lower 4 bytes */
+ movl %eax, %edx
+ andl $0x0000ffff, %edx # Target code segment offset [15:0]
+ movl $__KERNEL32_CS, %ecx # Target code segment selector
+ shl $16, %ecx
+ orl %ecx, %edx
+
+ /* Store lower 4 bytes to IDT */
+ movl %edx, (%ebx)
+
+ /* Build IDT entry, upper 4 bytes */
+ movl %eax, %edx
+ andl $0xffff0000, %edx # Target code segment offset [31:16]
+ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
+
+ /* Store upper 4 bytes to IDT */
+ movl %edx, 4(%ebx)
+
+ pop %ecx
+ pop %ebx
+ RET
+SYM_FUNC_END(startup32_set_idt_entry)
+#endif
+
+SYM_FUNC_START(startup32_load_idt)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* #VC handler */
+ leal rva(startup32_vc_handler)(%ebp), %eax
+ movl $X86_TRAP_VC, %edx
+ call startup32_set_idt_entry
+
+ /* Load IDT */
+ leal rva(boot32_idt)(%ebp), %eax
+ movl %eax, rva(boot32_idt_desc+2)(%ebp)
+ lidt rva(boot32_idt_desc)(%ebp)
+#endif
+ RET
+SYM_FUNC_END(startup32_load_idt)
+
+/*
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
+ *
+ * The check makes use of the fact that all memory is encrypted when paging is
+ * disabled. The function creates 64 bits of random data using the RDRAND
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+ * hypervisor violates that the kernel will crash right here.
+ *
+ * The 64 bits of random data are stored to a memory location and at the same
+ * time kept in the %eax and %ebx registers. Since encryption is always active
+ * when paging is off the random data will be stored encrypted in main memory.
+ *
+ * Then paging is enabled. When the C-bit position is correct all memory is
+ * still mapped encrypted and comparing the register values with memory will
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+ * the compare will use the encrypted random data and fail.
+ */
+SYM_FUNC_START(startup32_check_sev_cbit)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+ /* Check for non-zero sev_status */
+ movl rva(sev_status)(%ebp), %eax
+ testl %eax, %eax
+ jz 4f
+
+ /*
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
+ * because it is better to prevent forward progress if no random value
+ * can be gathered.
+ */
+1: rdrand %eax
+ jnc 1b
+2: rdrand %ebx
+ jnc 2b
+
+ /* Store to memory and keep it in the registers */
+ movl %eax, rva(sev_check_data)(%ebp)
+ movl %ebx, rva(sev_check_data+4)(%ebp)
+
+ /* Enable paging to see if encryption is active */
+ movl %cr0, %edx /* Backup %cr0 in %edx */
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+ movl %ecx, %cr0
+
+ cmpl %eax, rva(sev_check_data)(%ebp)
+ jne 3f
+ cmpl %ebx, rva(sev_check_data+4)(%ebp)
+ jne 3f
+
+ movl %edx, %cr0 /* Restore previous %cr0 */
+
+ jmp 4f
+
+3: /* Check failed - hlt the machine */
+ hlt
+ jmp 3b
+
+4:
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+#endif
+ RET
+SYM_FUNC_END(startup32_check_sev_cbit)
+
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
+
+SYM_DATA_START_LOCAL(boot_stack)
+ .fill BOOT_STACK_SIZE, 1, 0
+ .balign 16
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+ .section ".pgtable","aw",@nobits
+ .balign 4096
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
new file mode 100644
index 000000000..b4155273d
--- /dev/null
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016 Yinghai Lu
+ * Copyright (C) 2016 Kees Cook
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
+#include "error.h"
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <linux/pgtable.h>
+#include <asm/cmpxchg.h>
+#include <asm/trap_pf.h>
+#include <asm/trapnr.h>
+#include <asm/init.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#include "../../mm/ident_map.c"
+
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
+
+extern unsigned long get_cmd_line_ptr(void);
+
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+ unsigned char *entry;
+
+ /* Validate there is space available for a new page. */
+ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ return NULL;
+ }
+
+ /* Consumed more tables than expected? */
+ if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) {
+ debug_putstr("pgt_buf running low in " __FILE__ "\n");
+ debug_putstr("Need to raise BOOT_PGT_SIZE?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ }
+
+ entry = pages->pgt_buf + pages->pgt_buf_offset;
+ pages->pgt_buf_offset += PAGE_SIZE;
+
+ return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long top_level_pgt;
+
+phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
+
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info;
+
+/*
+ * Adds the specified range to the identity mappings.
+ */
+void kernel_add_identity_map(unsigned long start, unsigned long end)
+{
+ int ret;
+
+ /* Align boundary to 2M. */
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ if (start >= end)
+ return;
+
+ /* Build the mapping. */
+ ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end);
+ if (ret)
+ error("Error: kernel_ident_mapping_init() failed\n");
+}
+
+/* Locates and clears a region for a new top level page table. */
+void initialize_identity_maps(void *rmode)
+{
+ unsigned long cmdline;
+ struct setup_data *sd;
+
+ /* Exclude the encryption mask from __PHYSICAL_MASK */
+ physical_mask &= ~sme_me_mask;
+
+ /* Init mapping_info with run-time function/buffer pointers. */
+ mapping_info.alloc_pgt_page = alloc_pgt_page;
+ mapping_info.context = &pgt_data;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE;
+
+ /*
+ * It should be impossible for this not to already be true,
+ * but since calling this a second time would rewind the other
+ * counters, let's just make sure this is reset too.
+ */
+ pgt_data.pgt_buf_offset = 0;
+
+ /*
+ * If we came here via startup_32(), cr3 will be _pgtable already
+ * and we must append to the existing area instead of entirely
+ * overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
+ */
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
+ pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ } else {
+ pgt_data.pgt_buf = _pgtable;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
+ }
+
+ /*
+ * New page-table is set up - map the kernel image, boot_params and the
+ * command line. The uncompressed kernel requires boot_params and the
+ * command line to be mapped in the identity mapping. Map them
+ * explicitly here in case the compressed kernel does not touch them,
+ * or does not touch all the pages covering them.
+ */
+ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
+ boot_params = rmode;
+ kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ cmdline = get_cmd_line_ptr();
+ kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+ /*
+ * Also map the setup_data entries passed via boot_params in case they
+ * need to be accessed by uncompressed kernel via the identity mapping.
+ */
+ sd = (struct setup_data *)boot_params->hdr.setup_data;
+ while (sd) {
+ unsigned long sd_addr = (unsigned long)sd;
+
+ kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len);
+ sd = (struct setup_data *)sd->next;
+ }
+
+ sev_prep_identity_maps(top_level_pgt);
+
+ /* Load the new page-table. */
+ write_cr3(top_level_pgt);
+
+ /*
+ * Now that the required page table mappings are established and a
+ * GHCB can be used, check for SNP guest/HV feature compatibility.
+ */
+ snp_check_features();
+}
+
+static pte_t *split_large_pmd(struct x86_mapping_info *info,
+ pmd_t *pmdp, unsigned long __address)
+{
+ unsigned long page_flags;
+ unsigned long address;
+ pte_t *pte;
+ pmd_t pmd;
+ int i;
+
+ pte = (pte_t *)info->alloc_pgt_page(info->context);
+ if (!pte)
+ return NULL;
+
+ address = __address & PMD_MASK;
+ /* No large page - clear PSE flag */
+ page_flags = info->page_flag & ~_PAGE_PSE;
+
+ /* Populate the PTEs */
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ set_pte(&pte[i], __pte(address | page_flags));
+ address += PAGE_SIZE;
+ }
+
+ /*
+ * Ideally we need to clear the large PMD first and do a TLB
+ * flush before we write the new PMD. But the 2M range of the
+ * PMD might contain the code we execute and/or the stack
+ * we are on, so we can't do that. But that should be safe here
+ * because we are going from large to small mappings and we are
+ * also the only user of the page-table, so there is no chance
+ * of a TLB multihit.
+ */
+ pmd = __pmd((unsigned long)pte | info->kernpg_flag);
+ set_pmd(pmdp, pmd);
+ /* Flush TLB to establish the new PMD */
+ write_cr3(top_level_pgt);
+
+ return pte + pte_index(__address);
+}
+
+static void clflush_page(unsigned long address)
+{
+ unsigned int flush_size;
+ char *cl, *start, *end;
+
+ /*
+ * Hardcode cl-size to 64 - CPUID can't be used here because that might
+ * cause another #VC exception and the GHCB is not ready to use yet.
+ */
+ flush_size = 64;
+ start = (char *)(address & PAGE_MASK);
+ end = start + PAGE_SIZE;
+
+ /*
+ * First make sure there are no pending writes on the cache-lines to
+ * flush.
+ */
+ asm volatile("mfence" : : : "memory");
+
+ for (cl = start; cl != end; cl += flush_size)
+ clflush(cl);
+}
+
+static int set_clr_page_flags(struct x86_mapping_info *info,
+ unsigned long address,
+ pteval_t set, pteval_t clr)
+{
+ pgd_t *pgdp = (pgd_t *)top_level_pgt;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep, pte;
+
+ /*
+ * First make sure there is a PMD mapping for 'address'.
+ * It should already exist, but keep things generic.
+ *
+ * To map the page just read from it and fault it in if there is no
+ * mapping yet. kernel_add_identity_map() can't be called here because
+ * that would unconditionally map the address on PMD level, destroying
+ * any PTE-level mappings that might already exist. Use assembly here
+ * so the access won't be optimized away.
+ */
+ asm volatile("mov %[address], %%r9"
+ :: [address] "g" (*(unsigned long *)address)
+ : "r9", "memory");
+
+ /*
+ * The page is mapped at least with PMD size - so skip checks and walk
+ * directly to the PMD.
+ */
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
+ pmdp = pmd_offset(pudp, address);
+
+ if (pmd_large(*pmdp))
+ ptep = split_large_pmd(info, pmdp, address);
+ else
+ ptep = pte_offset_kernel(pmdp, address);
+
+ if (!ptep)
+ return -ENOMEM;
+
+ /*
+ * Changing encryption attributes of a page requires to flush it from
+ * the caches.
+ */
+ if ((set | clr) & _PAGE_ENC) {
+ clflush_page(address);
+
+ /*
+ * If the encryption attribute is being cleared, change the page state
+ * to shared in the RMP table.
+ */
+ if (clr)
+ snp_set_page_shared(__pa(address & PAGE_MASK));
+ }
+
+ /* Update PTE */
+ pte = *ptep;
+ pte = pte_set_flags(pte, set);
+ pte = pte_clear_flags(pte, clr);
+ set_pte(ptep, pte);
+
+ /*
+ * If the encryption attribute is being set, then change the page state to
+ * private in the RMP entry. The page state change must be done after the PTE
+ * is updated.
+ */
+ if (set & _PAGE_ENC)
+ snp_set_page_private(__pa(address & PAGE_MASK));
+
+ /* Flush TLB after changing encryption attribute */
+ write_cr3(top_level_pgt);
+
+ return 0;
+}
+
+int set_page_decrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
+}
+
+int set_page_encrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
+}
+
+int set_page_non_present(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
+}
+
+static void do_pf_error(const char *msg, unsigned long error_code,
+ unsigned long address, unsigned long ip)
+{
+ error_putstr(msg);
+
+ error_putstr("\nError Code: ");
+ error_puthex(error_code);
+ error_putstr("\nCR2: 0x");
+ error_puthex(address);
+ error_putstr("\nRIP relative to _head: 0x");
+ error_puthex(ip - (unsigned long)_head);
+ error_putstr("\n");
+
+ error("Stopping.\n");
+}
+
+void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ unsigned long address = native_read_cr2();
+ unsigned long end;
+ bool ghcb_fault;
+
+ ghcb_fault = sev_es_check_ghcb_fault(address);
+
+ address &= PMD_MASK;
+ end = address + PMD_SIZE;
+
+ /*
+ * Check for unexpected error codes. Unexpected are:
+ * - Faults on present pages
+ * - User faults
+ * - Reserved bits set
+ */
+ if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD))
+ do_pf_error("Unexpected page-fault:", error_code, address, regs->ip);
+ else if (ghcb_fault)
+ do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip);
+
+ /*
+ * Error code is sane - now identity map the 2M region around
+ * the faulting address.
+ */
+ kernel_add_identity_map(address, end);
+}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
new file mode 100644
index 000000000..3cdf94b41
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/trap_pf.h>
+#include <asm/segment.h>
+#include <asm/trapnr.h>
+#include "misc.h"
+
+static void set_idt_entry(int vector, void (*handler)(void))
+{
+ unsigned long address = (unsigned long)handler;
+ gate_desc entry;
+
+ memset(&entry, 0, sizeof(entry));
+
+ entry.offset_low = (u16)(address & 0xffff);
+ entry.segment = __KERNEL_CS;
+ entry.bits.type = GATE_TRAP;
+ entry.bits.p = 1;
+ entry.offset_middle = (u16)((address >> 16) & 0xffff);
+ entry.offset_high = (u32)(address >> 32);
+
+ memcpy(&boot_idt[vector], &entry, sizeof(entry));
+}
+
+/* Have this here so we don't need to include <asm/desc.h> */
+static void load_boot_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+/* Setup IDT before kernel jumping to .Lrelocated */
+void load_stage1_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ set_idt_entry(X86_TRAP_VC, boot_stage1_vc);
+
+ load_boot_idt(&boot_idt_desc);
+}
+
+/*
+ * Setup IDT after kernel jumping to .Lrelocated.
+ *
+ * initialize_identity_maps() needs a #PF handler to be setup
+ * in order to be able to fault-in identity mapping ranges; see
+ * do_boot_page_fault().
+ *
+ * This #PF handler setup needs to happen in load_stage2_idt() where the
+ * IDT is loaded and there the #VC IDT entry gets setup too.
+ *
+ * In order to be able to handle #VCs, one needs a GHCB which
+ * gets setup with an already set up pagetable, which is done in
+ * initialize_identity_maps(). And there's the catch 22: the boot #VC
+ * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself
+ * (and, especially set_page_decrypted()) because the SEV-ES setup code
+ * cannot initialize a GHCB as there's no #PF handler yet...
+ */
+void load_stage2_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+ set_idt_entry(X86_TRAP_PF, boot_page_fault);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Clear the second stage #VC handler in case guest types
+ * needing #VC have not been detected.
+ */
+ if (sev_status & BIT(1))
+ set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+ else
+ set_idt_entry(X86_TRAP_VC, NULL);
+#endif
+
+ load_boot_idt(&boot_idt_desc);
+}
+
+void cleanup_exception_handling(void)
+{
+ /*
+ * Flush GHCB from cache and map it encrypted again when running as
+ * SEV-ES guest.
+ */
+ sev_es_shutdown_ghcb();
+
+ /* Set a null-idt, disabling #PF and #VC handling */
+ boot_idt_desc.size = 0;
+ boot_idt_desc.address = 0;
+ load_boot_idt(&boot_idt_desc);
+}
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
new file mode 100644
index 000000000..22890e199
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Early IDT handler entry points
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#include <asm/segment.h>
+
+/* For ORIG_RAX */
+#include "../../entry/calling.h"
+
+.macro EXCEPTION_HANDLER name function error_code=0
+SYM_FUNC_START(\name)
+
+ /* Build pt_regs */
+ .if \error_code == 0
+ pushq $0
+ .endif
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ /* Call handler with pt_regs */
+ movq %rsp, %rdi
+ /* Error code is second parameter */
+ movq ORIG_RAX(%rsp), %rsi
+ call \function
+
+ /* Restore regs */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+
+ /* Remove error code and return */
+ addq $8, %rsp
+
+ iretq
+SYM_FUNC_END(\name)
+ .endm
+
+ .text
+ .code64
+
+EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1
+EXCEPTION_HANDLER boot_stage2_vc do_boot_stage2_vc error_code=1
+#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
new file mode 100644
index 000000000..e476bcbd9
--- /dev/null
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kaslr.c
+ *
+ * This contains the routines needed to generate a reasonable level of
+ * entropy to choose a randomized kernel base address offset in support
+ * of Kernel Address Space Layout Randomization (KASLR). Additionally
+ * handles walking the physical memory maps (and tracking memory regions
+ * to avoid) in order to select a physical memory location that can
+ * contain the entire properly aligned running kernel image.
+ *
+ */
+
+/*
+ * isspace() in linux/ctype.h is expected by next_args() to filter
+ * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
+ * since isdigit() is implemented in both of them. Hence disable it
+ * here.
+ */
+#define BOOT_CTYPE_H
+
+#include "misc.h"
+#include "error.h"
+#include "../string.h"
+#include "efi.h"
+
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/ctype.h>
+#include <generated/utsversion.h>
+#include <generated/utsrelease.h>
+
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
+
+extern unsigned long get_cmd_line_ptr(void);
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+static unsigned long rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ unsigned long *ptr = (unsigned long *)area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long get_boot_seed(void)
+{
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+ hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+
+ return hash;
+}
+
+#define KASLR_COMPRESSED_BOOT
+#include "../../lib/kaslr.c"
+
+
+/* Only supporting at most 4 unusable memmap regions with kaslr */
+#define MAX_MEMMAP_REGIONS 4
+
+static bool memmap_too_large;
+
+
+/*
+ * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
+ * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
+ */
+static u64 mem_limit;
+
+/* Number of immovable memory regions */
+static int num_immovable_mem;
+
+enum mem_avoid_index {
+ MEM_AVOID_ZO_RANGE = 0,
+ MEM_AVOID_INITRD,
+ MEM_AVOID_CMDLINE,
+ MEM_AVOID_BOOTPARAMS,
+ MEM_AVOID_MEMMAP_BEGIN,
+ MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
+ MEM_AVOID_MAX,
+};
+
+static struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+ /* Item one is entirely before item two. */
+ if (one->start + one->size <= two->start)
+ return false;
+ /* Item one is entirely after item two. */
+ if (one->start >= two->start + two->size)
+ return false;
+ return true;
+}
+
+char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+#include "../../../../lib/ctype.c"
+#include "../../../../lib/cmdline.c"
+
+enum parse_mode {
+ PARSE_MEMMAP,
+ PARSE_EFI,
+};
+
+static int
+parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
+{
+ char *oldp;
+
+ if (!p)
+ return -EINVAL;
+
+ /* We don't care about this option here */
+ if (!strncmp(p, "exactmap", 8))
+ return -EINVAL;
+
+ oldp = p;
+ *size = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ switch (*p) {
+ case '#':
+ case '$':
+ case '!':
+ *start = memparse(p + 1, &p);
+ return 0;
+ case '@':
+ if (mode == PARSE_MEMMAP) {
+ /*
+ * memmap=nn@ss specifies usable region, should
+ * be skipped
+ */
+ *size = 0;
+ } else {
+ u64 flags;
+
+ /*
+ * efi_fake_mem=nn@ss:attr the attr specifies
+ * flags that might imply a soft-reservation.
+ */
+ *start = memparse(p + 1, &p);
+ if (p && *p == ':') {
+ p++;
+ if (kstrtoull(p, 0, &flags) < 0)
+ *size = 0;
+ else if (flags & EFI_MEMORY_SP)
+ return 0;
+ }
+ *size = 0;
+ }
+ fallthrough;
+ default:
+ /*
+ * If w/o offset, only size specified, memmap=nn[KMG] has the
+ * same behaviour as mem=nn[KMG]. It limits the max address
+ * system can use. Region above the limit should be avoided.
+ */
+ *start = 0;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void mem_avoid_memmap(enum parse_mode mode, char *str)
+{
+ static int i;
+
+ if (i >= MAX_MEMMAP_REGIONS)
+ return;
+
+ while (str && (i < MAX_MEMMAP_REGIONS)) {
+ int rc;
+ u64 start, size;
+ char *k = strchr(str, ',');
+
+ if (k)
+ *k++ = 0;
+
+ rc = parse_memmap(str, &start, &size, mode);
+ if (rc < 0)
+ break;
+ str = k;
+
+ if (start == 0) {
+ /* Store the specified memory limit if size > 0 */
+ if (size > 0 && size < mem_limit)
+ mem_limit = size;
+
+ continue;
+ }
+
+ mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
+ mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
+ i++;
+ }
+
+ /* More than 4 memmaps, fail kaslr */
+ if ((i >= MAX_MEMMAP_REGIONS) && str)
+ memmap_too_large = true;
+}
+
+/* Store the number of 1GB huge pages which users specified: */
+static unsigned long max_gb_huge_pages;
+
+static void parse_gb_huge_pages(char *param, char *val)
+{
+ static bool gbpage_sz;
+ char *p;
+
+ if (!strcmp(param, "hugepagesz")) {
+ p = val;
+ if (memparse(p, &p) != PUD_SIZE) {
+ gbpage_sz = false;
+ return;
+ }
+
+ if (gbpage_sz)
+ warn("Repeatedly set hugeTLB page size of 1G!\n");
+ gbpage_sz = true;
+ return;
+ }
+
+ if (!strcmp(param, "hugepages") && gbpage_sz) {
+ p = val;
+ max_gb_huge_pages = simple_strtoull(p, &p, 0);
+ return;
+ }
+}
+
+static void handle_mem_options(void)
+{
+ char *args = (char *)get_cmd_line_ptr();
+ size_t len;
+ char *tmp_cmdline;
+ char *param, *val;
+ u64 mem_size;
+
+ if (!args)
+ return;
+
+ len = strnlen(args, COMMAND_LINE_SIZE-1);
+ tmp_cmdline = malloc(len + 1);
+ if (!tmp_cmdline)
+ error("Failed to allocate space for tmp_cmdline");
+
+ memcpy(tmp_cmdline, args, len);
+ tmp_cmdline[len] = 0;
+ args = tmp_cmdline;
+
+ /* Chew leading spaces */
+ args = skip_spaces(args);
+
+ while (*args) {
+ args = next_arg(args, &param, &val);
+ /* Stop at -- */
+ if (!val && strcmp(param, "--") == 0)
+ break;
+
+ if (!strcmp(param, "memmap")) {
+ mem_avoid_memmap(PARSE_MEMMAP, val);
+ } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
+ parse_gb_huge_pages(param, val);
+ } else if (!strcmp(param, "mem")) {
+ char *p = val;
+
+ if (!strcmp(p, "nopentium"))
+ continue;
+ mem_size = memparse(p, &p);
+ if (mem_size == 0)
+ break;
+
+ if (mem_size < mem_limit)
+ mem_limit = mem_size;
+ } else if (!strcmp(param, "efi_fake_mem")) {
+ mem_avoid_memmap(PARSE_EFI, val);
+ }
+ }
+
+ free(tmp_cmdline);
+ return;
+}
+
+/*
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
+ * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
+ *
+ * The mem_avoid array is used to store the ranges that need to be avoided
+ * when KASLR searches for an appropriate random address. We must avoid any
+ * regions that are unsafe to overlap with during decompression, and other
+ * things like the initrd, cmdline and boot_params. This comment seeks to
+ * explain mem_avoid as clearly as possible since incorrect mem_avoid
+ * memory ranges lead to really hard to debug boot failures.
+ *
+ * The initrd, cmdline, and boot_params are trivial to identify for
+ * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
+ * MEM_AVOID_BOOTPARAMS respectively below.
+ *
+ * What is not obvious how to avoid is the range of memory that is used
+ * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
+ * the compressed kernel (ZO) and its run space, which is used to extract
+ * the uncompressed kernel (VO) and relocs.
+ *
+ * ZO's full run size sits against the end of the decompression buffer, so
+ * we can calculate where text, data, bss, etc of ZO are positioned more
+ * easily.
+ *
+ * For additional background, the decompression calculations can be found
+ * in header.S, and the memory diagram is based on the one found in misc.c.
+ *
+ * The following conditions are already enforced by the image layouts and
+ * associated code:
+ * - input + input_size >= output + output_size
+ * - kernel_total_size <= init_size
+ * - kernel_total_size <= output_size (see Note below)
+ * - output + init_size >= output + output_size
+ *
+ * (Note that kernel_total_size and output_size have no fundamental
+ * relationship, but output_size is passed to choose_random_location
+ * as a maximum of the two. The diagram is showing a case where
+ * kernel_total_size is larger than output_size, but this case is
+ * handled by bumping output_size.)
+ *
+ * The above conditions can be illustrated by a diagram:
+ *
+ * 0 output input input+input_size output+init_size
+ * | | | | |
+ * | | | | |
+ * |-----|--------|--------|--------------|-----------|--|-------------|
+ * | | |
+ * | | |
+ * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size
+ *
+ * [output, output+init_size) is the entire memory range used for
+ * extracting the compressed image.
+ *
+ * [output, output+kernel_total_size) is the range needed for the
+ * uncompressed kernel (VO) and its run size (bss, brk, etc).
+ *
+ * [output, output+output_size) is VO plus relocs (i.e. the entire
+ * uncompressed payload contained by ZO). This is the area of the buffer
+ * written to during decompression.
+ *
+ * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
+ * range of the copied ZO and decompression code. (i.e. the range
+ * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
+ *
+ * [input, input+input_size) is the original copied compressed image (ZO)
+ * (i.e. it does not include its run size). This range must be avoided
+ * because it contains the data used for decompression.
+ *
+ * [input+input_size, output+init_size) is [_text, _end) for ZO. This
+ * range includes ZO's heap and stack, and must be avoided since it
+ * performs the decompression.
+ *
+ * Since the above two ranges need to be avoided and they are adjacent,
+ * they can be merged, resulting in: [input, output+init_size) which
+ * becomes the MEM_AVOID_ZO_RANGE below.
+ */
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ unsigned long output)
+{
+ unsigned long init_size = boot_params->hdr.init_size;
+ u64 initrd_start, initrd_size;
+ unsigned long cmd_line, cmd_line_size;
+
+ /*
+ * Avoid the region that is unsafe to overlap during
+ * decompression.
+ */
+ mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
+ mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
+
+ /* Avoid initrd. */
+ initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
+ initrd_start |= boot_params->hdr.ramdisk_image;
+ initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
+ initrd_size |= boot_params->hdr.ramdisk_size;
+ mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
+ mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
+ /* No need to set mapping for initrd, it will be handled in VO. */
+
+ /* Avoid kernel command line. */
+ cmd_line = get_cmd_line_ptr();
+ /* Calculate size of cmd_line. */
+ if (cmd_line) {
+ cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ }
+
+ /* Avoid boot parameters. */
+ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+
+ /* We don't need to set a mapping for setup_data. */
+
+ /* Mark the memmap regions we need to avoid */
+ handle_mem_options();
+
+ /* Enumerate the immovable memory regions */
+ num_immovable_mem = count_immovable_mem_regions();
+}
+
+/*
+ * Does this memory vector overlap a known avoided area? If so, record the
+ * overlap region with the lowest address.
+ */
+static bool mem_avoid_overlap(struct mem_vector *img,
+ struct mem_vector *overlap)
+{
+ int i;
+ struct setup_data *ptr;
+ u64 earliest = img->start + img->size;
+ bool is_overlapping = false;
+
+ for (i = 0; i < MEM_AVOID_MAX; i++) {
+ if (mem_overlaps(img, &mem_avoid[i]) &&
+ mem_avoid[i].start < earliest) {
+ *overlap = mem_avoid[i];
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+ }
+
+ /* Avoid all entries in the setup_data linked list. */
+ ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ while (ptr) {
+ struct mem_vector avoid;
+
+ avoid.start = (unsigned long)ptr;
+ avoid.size = sizeof(*ptr) + ptr->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+
+ if (ptr->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
+ avoid.start = ((struct setup_indirect *)ptr->data)->addr;
+ avoid.size = ((struct setup_indirect *)ptr->data)->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+ }
+
+ ptr = (struct setup_data *)(unsigned long)ptr->next;
+ }
+
+ return is_overlapping;
+}
+
+struct slot_area {
+ u64 addr;
+ unsigned long num;
+};
+
+#define MAX_SLOT_AREA 100
+
+static struct slot_area slot_areas[MAX_SLOT_AREA];
+static unsigned int slot_area_index;
+static unsigned long slot_max;
+
+static void store_slot_info(struct mem_vector *region, unsigned long image_size)
+{
+ struct slot_area slot_area;
+
+ if (slot_area_index == MAX_SLOT_AREA)
+ return;
+
+ slot_area.addr = region->start;
+ slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
+
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
+}
+
+/*
+ * Skip as many 1GB huge pages as possible in the passed region
+ * according to the number which users specified:
+ */
+static void
+process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
+{
+ u64 pud_start, pud_end;
+ unsigned long gb_huge_pages;
+ struct mem_vector tmp;
+
+ if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ /* Are there any 1GB pages in the region? */
+ pud_start = ALIGN(region->start, PUD_SIZE);
+ pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
+
+ /* No good 1GB huge pages found: */
+ if (pud_start >= pud_end) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ /* Check if the head part of the region is usable. */
+ if (pud_start >= region->start + image_size) {
+ tmp.start = region->start;
+ tmp.size = pud_start - region->start;
+ store_slot_info(&tmp, image_size);
+ }
+
+ /* Skip the good 1GB pages. */
+ gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
+ if (gb_huge_pages > max_gb_huge_pages) {
+ pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
+ max_gb_huge_pages = 0;
+ } else {
+ max_gb_huge_pages -= gb_huge_pages;
+ }
+
+ /* Check if the tail part of the region is usable. */
+ if (region->start + region->size >= pud_end + image_size) {
+ tmp.start = pud_end;
+ tmp.size = region->start + region->size - pud_end;
+ store_slot_info(&tmp, image_size);
+ }
+}
+
+static u64 slots_fetch_random(void)
+{
+ unsigned long slot;
+ unsigned int i;
+
+ /* Handle case of no slots stored. */
+ if (slot_max == 0)
+ return 0;
+
+ slot = kaslr_get_random_long("Physical") % slot_max;
+
+ for (i = 0; i < slot_area_index; i++) {
+ if (slot >= slot_areas[i].num) {
+ slot -= slot_areas[i].num;
+ continue;
+ }
+ return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
+ }
+
+ if (i == slot_area_index)
+ debug_putstr("slots_fetch_random() failed!?\n");
+ return 0;
+}
+
+static void __process_mem_region(struct mem_vector *entry,
+ unsigned long minimum,
+ unsigned long image_size)
+{
+ struct mem_vector region, overlap;
+ u64 region_end;
+
+ /* Enforce minimum and memory limit. */
+ region.start = max_t(u64, entry->start, minimum);
+ region_end = min(entry->start + entry->size, mem_limit);
+
+ /* Give up if slot area array is full. */
+ while (slot_area_index < MAX_SLOT_AREA) {
+ /* Potentially raise address to meet alignment needs. */
+ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+ /* Did we raise the address above the passed in memory entry? */
+ if (region.start > region_end)
+ return;
+
+ /* Reduce size by any delta from the original address. */
+ region.size = region_end - region.start;
+
+ /* Return if region can't contain decompressed kernel */
+ if (region.size < image_size)
+ return;
+
+ /* If nothing overlaps, store the region and return. */
+ if (!mem_avoid_overlap(&region, &overlap)) {
+ process_gb_huge_pages(&region, image_size);
+ return;
+ }
+
+ /* Store beginning of region if holds at least image_size. */
+ if (overlap.start >= region.start + image_size) {
+ region.size = overlap.start - region.start;
+ process_gb_huge_pages(&region, image_size);
+ }
+
+ /* Clip off the overlapping region and start over. */
+ region.start = overlap.start + overlap.size;
+ }
+}
+
+static bool process_mem_region(struct mem_vector *region,
+ unsigned long minimum,
+ unsigned long image_size)
+{
+ int i;
+ /*
+ * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
+ * use @region directly.
+ */
+ if (!num_immovable_mem) {
+ __process_mem_region(region, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
+ return true;
+ }
+ return false;
+ }
+
+#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+ /*
+ * If immovable memory found, filter the intersection between
+ * immovable memory and @region.
+ */
+ for (i = 0; i < num_immovable_mem; i++) {
+ u64 start, end, entry_end, region_end;
+ struct mem_vector entry;
+
+ if (!mem_overlaps(region, &immovable_mem[i]))
+ continue;
+
+ start = immovable_mem[i].start;
+ end = start + immovable_mem[i].size;
+ region_end = region->start + region->size;
+
+ entry.start = clamp(region->start, start, end);
+ entry_end = clamp(region_end, start, end);
+ entry.size = entry_end - entry.start;
+
+ __process_mem_region(&entry, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
+ return true;
+ }
+ }
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_EFI
+/*
+ * Returns true if we processed the EFI memmap, which we prefer over the E820
+ * table if it is available.
+ */
+static bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+ struct efi_info *e = &boot_params->efi_info;
+ bool efi_mirror_found = false;
+ struct mem_vector region;
+ efi_memory_desc_t *md;
+ unsigned long pmap;
+ char *signature;
+ u32 nr_desc;
+ int i;
+
+ signature = (char *)&e->efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
+ return false;
+
+#ifdef CONFIG_X86_32
+ /* Can't handle data above 4GB at this time */
+ if (e->efi_memmap_hi) {
+ warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
+ return false;
+ }
+ pmap = e->efi_memmap;
+#else
+ pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
+#endif
+
+ nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
+ efi_mirror_found = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+
+ /*
+ * Here we are more conservative in picking free memory than
+ * the EFI spec allows:
+ *
+ * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
+ * free memory and thus available to place the kernel image into,
+ * but in practice there's firmware where using that memory leads
+ * to crashes.
+ *
+ * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
+ */
+ if (md->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ if (process_mem_region(&region, minimum, image_size))
+ break;
+ }
+ return true;
+}
+#else
+static inline bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+ return false;
+}
+#endif
+
+static void process_e820_entries(unsigned long minimum,
+ unsigned long image_size)
+{
+ int i;
+ struct mem_vector region;
+ struct boot_e820_entry *entry;
+
+ /* Verify potential e820 positions, appending to slots list. */
+ for (i = 0; i < boot_params->e820_entries; i++) {
+ entry = &boot_params->e820_table[i];
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+ region.start = entry->addr;
+ region.size = entry->size;
+ if (process_mem_region(&region, minimum, image_size))
+ break;
+ }
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ u64 phys_addr;
+
+ /* Bail out early if it's impossible to succeed. */
+ if (minimum + image_size > mem_limit)
+ return 0;
+
+ /* Check if we had too many memmaps. */
+ if (memmap_too_large) {
+ debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
+ return 0;
+ }
+
+ if (!process_efi_entries(minimum, image_size))
+ process_e820_entries(minimum, image_size);
+
+ phys_addr = slots_fetch_random();
+
+ /* Perform a final check to make sure the address is in range. */
+ if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
+ warn("Invalid physical address chosen!\n");
+ return 0;
+ }
+
+ return (unsigned long)phys_addr;
+}
+
+static unsigned long find_random_virt_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ unsigned long slots, random_addr;
+
+ /*
+ * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
+ * that can hold image_size within the range of minimum to
+ * KERNEL_IMAGE_SIZE?
+ */
+ slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
+
+ random_addr = kaslr_get_random_long("Virtual") % slots;
+
+ return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
+}
+
+/*
+ * Since this function examines addresses much more numerically,
+ * it takes the input and output pointers as 'unsigned long'.
+ */
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
+{
+ unsigned long random_addr, min_addr;
+
+ if (cmdline_find_option_bool("nokaslr")) {
+ warn("KASLR disabled: 'nokaslr' on cmdline.");
+ return;
+ }
+
+ boot_params->hdr.loadflags |= KASLR_FLAG;
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ mem_limit = KERNEL_IMAGE_SIZE;
+ else
+ mem_limit = MAXMEM;
+
+ /* Record the various known unsafe memory ranges. */
+ mem_avoid_init(input, input_size, *output);
+
+ /*
+ * Low end of the randomization range should be the
+ * smaller of 512M or the initial kernel image
+ * location:
+ */
+ min_addr = min(*output, 512UL << 20);
+ /* Make sure minimum is aligned. */
+ min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
+
+ /* Walk available memory entries to find a random address. */
+ random_addr = find_random_phys_addr(min_addr, output_size);
+ if (!random_addr) {
+ warn("Physical KASLR disabled: no suitable memory region!");
+ } else {
+ /* Update the new physical address location. */
+ if (*output != random_addr)
+ *output = random_addr;
+ }
+
+
+ /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
+ if (IS_ENABLED(CONFIG_X86_64))
+ random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
+ *virt_addr = random_addr;
+}
diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S
new file mode 100644
index 000000000..f818ee8fb
--- /dev/null
+++ b/arch/x86/boot/compressed/kernel_info.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/bootparam.h>
+
+ .section ".rodata.kernel_info", "a"
+
+ .global kernel_info
+
+kernel_info:
+ /* Header, Linux top (structure). */
+ .ascii "LToP"
+ /* Size. */
+ .long kernel_info_var_len_data - kernel_info
+ /* Size total. */
+ .long kernel_info_end - kernel_info
+
+ /* Maximal allowed type for setup_data and setup_indirect structs. */
+ .long SETUP_TYPE_MAX
+
+kernel_info_var_len_data:
+ /* Empty for time being... */
+kernel_info_end:
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
new file mode 100644
index 000000000..a73e4d783
--- /dev/null
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .code32
+SYM_FUNC_START(get_sev_encryption_bit)
+ xor %eax, %eax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %ebx
+ push %ecx
+ push %edx
+
+ movl $0x80000000, %eax /* CPUID to check the highest leaf */
+ cpuid
+ cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
+ jb .Lno_sev
+
+ /*
+ * Check for the SEV feature:
+ * CPUID Fn8000_001F[EAX] - Bit 1
+ * CPUID Fn8000_001F[EBX] - Bits 5:0
+ * Pagetable bit position used to indicate encryption
+ */
+ movl $0x8000001f, %eax
+ cpuid
+ bt $1, %eax /* Check if SEV is available */
+ jnc .Lno_sev
+
+ movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */
+ rdmsr
+ bt $MSR_AMD64_SEV_ENABLED_BIT, %eax /* Check if SEV is active */
+ jnc .Lno_sev
+
+ movl %ebx, %eax
+ andl $0x3f, %eax /* Return the encryption bit location */
+ jmp .Lsev_exit
+
+.Lno_sev:
+ xor %eax, %eax
+
+.Lsev_exit:
+ pop %edx
+ pop %ecx
+ pop %ebx
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+ RET
+SYM_FUNC_END(get_sev_encryption_bit)
+
+/**
+ * sev_es_req_cpuid - Request a CPUID value from the Hypervisor using
+ * the GHCB MSR protocol
+ *
+ * @%eax: Register to request (0=EAX, 1=EBX, 2=ECX, 3=EDX)
+ * @%edx: CPUID Function
+ *
+ * Returns 0 in %eax on success, non-zero on failure
+ * %edx returns CPUID value on success
+ */
+SYM_CODE_START_LOCAL(sev_es_req_cpuid)
+ shll $30, %eax
+ orl $0x00000004, %eax
+ movl $MSR_AMD64_SEV_ES_GHCB, %ecx
+ wrmsr
+ rep; vmmcall # VMGEXIT
+ rdmsr
+
+ /* Check response */
+ movl %eax, %ecx
+ andl $0x3ffff000, %ecx # Bits [12-29] MBZ
+ jnz 2f
+
+ /* Check return code */
+ andl $0xfff, %eax
+ cmpl $5, %eax
+ jne 2f
+
+ /* All good - return success */
+ xorl %eax, %eax
+1:
+ RET
+2:
+ movl $-1, %eax
+ jmp 1b
+SYM_CODE_END(sev_es_req_cpuid)
+
+SYM_CODE_START(startup32_vc_handler)
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+ /* Keep CPUID function in %ebx */
+ movl %eax, %ebx
+
+ /* Check if error-code == SVM_EXIT_CPUID */
+ cmpl $0x72, 16(%esp)
+ jne .Lfail
+
+ movl $0, %eax # Request CPUID[fn].EAX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 12(%esp) # Store result
+
+ movl $1, %eax # Request CPUID[fn].EBX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 8(%esp) # Store result
+
+ movl $2, %eax # Request CPUID[fn].ECX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 4(%esp) # Store result
+
+ movl $3, %eax # Request CPUID[fn].EDX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 0(%esp) # Store result
+
+ /*
+ * Sanity check CPUID results from the Hypervisor. See comment in
+ * do_vc_no_ghcb() for more details on why this is necessary.
+ */
+
+ /* Fail if SEV leaf not available in CPUID[0x80000000].EAX */
+ cmpl $0x80000000, %ebx
+ jne .Lcheck_sev
+ cmpl $0x8000001f, 12(%esp)
+ jb .Lfail
+ jmp .Ldone
+
+.Lcheck_sev:
+ /* Fail if SEV bit not set in CPUID[0x8000001f].EAX[1] */
+ cmpl $0x8000001f, %ebx
+ jne .Ldone
+ btl $1, 12(%esp)
+ jnc .Lfail
+
+.Ldone:
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ /* Remove error code */
+ addl $4, %esp
+
+ /* Jump over CPUID instruction */
+ addl $2, (%esp)
+
+ iret
+.Lfail:
+ /* Send terminate request to Hypervisor */
+ movl $0x100, %eax
+ xorl %edx, %edx
+ movl $MSR_AMD64_SEV_ES_GHCB, %ecx
+ wrmsr
+ rep; vmmcall
+
+ /* If request fails, go to hlt loop */
+ hlt
+ jmp .Lfail
+SYM_CODE_END(startup32_vc_handler)
+
+ .code64
+
+#include "../../kernel/sev_verify_cbit.S"
+
+ .data
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .balign 8
+SYM_DATA(sme_me_mask, .quad 0)
+SYM_DATA(sev_status, .quad 0)
+SYM_DATA(sev_check_data, .quad 0)
+#endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
new file mode 100644
index 000000000..cf690d871
--- /dev/null
+++ b/arch/x86/boot/compressed/misc.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * misc.c
+ *
+ * This is a collection of several routines used to extract the kernel
+ * which includes KASLR relocation, decompression, ELF parsing, and
+ * relocation processing. Additionally included are the screen and serial
+ * output functions and related debugging support functions.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ * puts by Nick Holloway 1993, better puts by Martin Mares 1995
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+
+#include "misc.h"
+#include "error.h"
+#include "pgtable.h"
+#include "../string.h"
+#include "../voffset.h"
+#include <asm/bootparam_utils.h>
+
+/*
+ * WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically at
+ * run time, but no relocation processing is performed. This means that
+ * it is not safe to place pointers in static structures.
+ */
+
+/* Macros used by the included decompressor code below. */
+#define STATIC static
+/* Define an externally visible malloc()/free(). */
+#define MALLOC_VISIBLE
+#include <linux/decompress/mm.h>
+
+/*
+ * Provide definitions of memzero and memmove as some of the decompressors will
+ * try to define their own functions if these are not defined as macros.
+ */
+#define memzero(s, n) memset((s), 0, (n))
+#ifndef memmove
+#define memmove memmove
+/* Functions used by the included decompressor code below. */
+void *memmove(void *dest, const void *src, size_t n);
+#endif
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+struct boot_params *boot_params;
+
+struct port_io_ops pio_ops;
+
+memptr free_mem_ptr;
+memptr free_mem_end_ptr;
+
+static char *vidmem;
+static int vidport;
+
+/* These might be accessed before .bss is cleared, so use .data instead. */
+static int lines __section(".data");
+static int cols __section(".data");
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
+/*
+ * NOTE: When adding a new decompressor, please update the analysis in
+ * ../header.S.
+ */
+
+static void scroll(void)
+{
+ int i;
+
+ memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+ for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
+ vidmem[i] = ' ';
+}
+
+#define XMTRDY 0x20
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define LSR 5 /* Line Status */
+static void serial_putchar(int ch)
+{
+ unsigned timeout = 0xffff;
+
+ while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+ cpu_relax();
+
+ outb(ch, early_serial_base + TXR);
+}
+
+void __putstr(const char *s)
+{
+ int x, y, pos;
+ char c;
+
+ if (early_serial_base) {
+ const char *str = s;
+ while (*str) {
+ if (*str == '\n')
+ serial_putchar('\r');
+ serial_putchar(*str++);
+ }
+ }
+
+ if (lines == 0 || cols == 0)
+ return;
+
+ x = boot_params->screen_info.orig_x;
+ y = boot_params->screen_info.orig_y;
+
+ while ((c = *s++) != '\0') {
+ if (c == '\n') {
+ x = 0;
+ if (++y >= lines) {
+ scroll();
+ y--;
+ }
+ } else {
+ vidmem[(x + cols * y) * 2] = c;
+ if (++x >= cols) {
+ x = 0;
+ if (++y >= lines) {
+ scroll();
+ y--;
+ }
+ }
+ }
+ }
+
+ boot_params->screen_info.orig_x = x;
+ boot_params->screen_info.orig_y = y;
+
+ pos = (x + cols * y) * 2; /* Update cursor position */
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
+}
+
+void __puthex(unsigned long value)
+{
+ char alpha[2] = "0";
+ int bits;
+
+ for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) {
+ unsigned long digit = (value >> bits) & 0xf;
+
+ if (digit < 0xA)
+ alpha[0] = '0' + digit;
+ else
+ alpha[0] = 'a' + (digit - 0xA);
+
+ __putstr(alpha);
+ }
+}
+
+#ifdef CONFIG_X86_NEED_RELOCS
+static void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
+{
+ int *reloc;
+ unsigned long delta, map, ptr;
+ unsigned long min_addr = (unsigned long)output;
+ unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
+
+ /*
+ * Calculate the delta between where vmlinux was linked to load
+ * and where it was actually loaded.
+ */
+ delta = min_addr - LOAD_PHYSICAL_ADDR;
+
+ /*
+ * The kernel contains a table of relocation addresses. Those
+ * addresses have the final load address of the kernel in virtual
+ * memory. We are currently working in the self map. So we need to
+ * create an adjustment for kernel memory addresses to the self map.
+ * This will involve subtracting out the base address of the kernel.
+ */
+ map = delta - __START_KERNEL_map;
+
+ /*
+ * 32-bit always performs relocations. 64-bit relocations are only
+ * needed if KASLR has chosen a different starting address offset
+ * from __START_KERNEL_map.
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ delta = virt_addr - LOAD_PHYSICAL_ADDR;
+
+ if (!delta) {
+ debug_putstr("No relocation needed... ");
+ return;
+ }
+ debug_putstr("Performing relocations... ");
+
+ /*
+ * Process relocations: 32 bit relocations first then 64 bit after.
+ * Three sets of binary relocations are added to the end of the kernel
+ * before compression. Each relocation table entry is the kernel
+ * address of the location which needs to be updated stored as a
+ * 32-bit value which is sign extended to 64 bits.
+ *
+ * Format is:
+ *
+ * kernel bits...
+ * 0 - zero terminator for 64 bit relocations
+ * 64 bit relocation repeated
+ * 0 - zero terminator for inverse 32 bit relocations
+ * 32 bit inverse relocation repeated
+ * 0 - zero terminator for 32 bit relocations
+ * 32 bit relocation repeated
+ *
+ * So we work backwards from the end of the decompressed image.
+ */
+ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("32-bit relocation outside of kernel!\n");
+
+ *(uint32_t *)ptr += delta;
+ }
+#ifdef CONFIG_X86_64
+ while (*--reloc) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("inverse 32-bit relocation outside of kernel!\n");
+
+ *(int32_t *)ptr -= delta;
+ }
+ for (reloc--; *reloc; reloc--) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("64-bit relocation outside of kernel!\n");
+
+ *(uint64_t *)ptr += delta;
+ }
+#endif
+}
+#else
+static inline void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
+{ }
+#endif
+
+static void parse_elf(void *output)
+{
+#ifdef CONFIG_X86_64
+ Elf64_Ehdr ehdr;
+ Elf64_Phdr *phdrs, *phdr;
+#else
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrs, *phdr;
+#endif
+ void *dest;
+ int i;
+
+ memcpy(&ehdr, output, sizeof(ehdr));
+ if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+ error("Kernel is not a valid ELF file");
+ return;
+ }
+
+ debug_putstr("Parsing ELF... ");
+
+ phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
+ if (!phdrs)
+ error("Failed to allocate space for phdrs");
+
+ memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ phdr = &phdrs[i];
+
+ switch (phdr->p_type) {
+ case PT_LOAD:
+#ifdef CONFIG_X86_64
+ if ((phdr->p_align % 0x200000) != 0)
+ error("Alignment of LOAD segment isn't multiple of 2MB");
+#endif
+#ifdef CONFIG_RELOCATABLE
+ dest = output;
+ dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+#else
+ dest = (void *)(phdr->p_paddr);
+#endif
+ memmove(dest, output + phdr->p_offset, phdr->p_filesz);
+ break;
+ default: /* Ignore other PT_* */ break;
+ }
+ }
+
+ free(phdrs);
+}
+
+/*
+ * The compressed kernel image (ZO), has been moved so that its position
+ * is against the end of the buffer used to hold the uncompressed kernel
+ * image (VO) and the execution environment (.bss, .brk), which makes sure
+ * there is room to do the in-place decompression. (See header.S for the
+ * calculations.)
+ *
+ * |-----compressed kernel image------|
+ * V V
+ * 0 extract_offset +INIT_SIZE
+ * |-----------|---------------|-------------------------|--------|
+ * | | | |
+ * VO__text startup_32 of ZO VO__end ZO__end
+ * ^ ^
+ * |-------uncompressed kernel image---------|
+ *
+ */
+asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ unsigned char *input_data,
+ unsigned long input_len,
+ unsigned char *output,
+ unsigned long output_len)
+{
+ const unsigned long kernel_total_size = VO__end - VO__text;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ unsigned long needed_size;
+
+ /* Retain x86 boot parameters pointer passed from startup_32/64. */
+ boot_params = rmode;
+
+ /* Clear flags intended for solely in-kernel use. */
+ boot_params->hdr.loadflags &= ~KASLR_FLAG;
+
+ sanitize_boot_params(boot_params);
+
+ if (boot_params->screen_info.orig_video_mode == 7) {
+ vidmem = (char *) 0xb0000;
+ vidport = 0x3b4;
+ } else {
+ vidmem = (char *) 0xb8000;
+ vidport = 0x3d4;
+ }
+
+ lines = boot_params->screen_info.orig_video_lines;
+ cols = boot_params->screen_info.orig_video_cols;
+
+ init_default_io_ops();
+
+ /*
+ * Detect TDX guest environment.
+ *
+ * It has to be done before console_init() in order to use
+ * paravirtualized port I/O operations if needed.
+ */
+ early_tdx_detect();
+
+ console_init();
+
+ /*
+ * Save RSDP address for later use. Have this after console_init()
+ * so that early debugging output from the RSDP parsing code can be
+ * collected.
+ */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
+ debug_putstr("early console in extract_kernel\n");
+
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ *
+ * On X86_64, the memory is mapped with PMD pages. Round the
+ * size up so that the full extent of PMD pages mapped is
+ * included in the check against the valid memory table
+ * entries. This ensures the full mapped area is usable RAM
+ * and doesn't include any reserved areas.
+ */
+ needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+ needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
+ /* Report initial kernel position details. */
+ debug_putaddr(input_data);
+ debug_putaddr(input_len);
+ debug_putaddr(output);
+ debug_putaddr(output_len);
+ debug_putaddr(kernel_total_size);
+ debug_putaddr(needed_size);
+
+#ifdef CONFIG_X86_64
+ /* Report address of 32-bit trampoline */
+ debug_putaddr(trampoline_32bit);
+#endif
+
+ choose_random_location((unsigned long)input_data, input_len,
+ (unsigned long *)&output,
+ needed_size,
+ &virt_addr);
+
+ /* Validate memory location choices. */
+ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+ error("Destination physical address inappropriately aligned");
+ if (virt_addr & (MIN_KERNEL_ALIGN - 1))
+ error("Destination virtual address inappropriately aligned");
+#ifdef CONFIG_X86_64
+ if (heap > 0x3fffffffffffUL)
+ error("Destination address too large");
+ if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
+#else
+ if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
+ error("Destination address too large");
+#endif
+#ifndef CONFIG_RELOCATABLE
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
+ error("Destination virtual address changed when not relocatable");
+#endif
+
+ debug_putstr("\nDecompressing Linux... ");
+ __decompress(input_data, input_len, NULL, NULL, output, output_len,
+ NULL, error);
+ parse_elf(output);
+ handle_relocations(output, output_len, virt_addr);
+ debug_putstr("done.\nBooting the kernel.\n");
+
+ /* Disable exception handling before booting the kernel */
+ cleanup_exception_handling();
+
+ return output;
+}
+
+void fortify_panic(const char *name)
+{
+ error("detected buffer overflow");
+}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
new file mode 100644
index 000000000..20118fb7c
--- /dev/null
+++ b/arch/x86/boot/compressed/misc.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_MISC_H
+#define BOOT_COMPRESSED_MISC_H
+
+/*
+ * Special hack: we have to be careful, because no indirections are allowed here,
+ * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening. (This list needs to be extended when new
+ * paravirt and debugging variants are added.)
+ */
+#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_XXL
+#undef CONFIG_PARAVIRT_SPINLOCKS
+#undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
+
+#define __NO_FORTIFY
+
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
+#include <linux/linkage.h>
+#include <linux/screen_info.h>
+#include <linux/elf.h>
+#include <asm/page.h>
+#include <asm/boot.h>
+#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
+
+#include "tdx.h"
+
+#define BOOT_CTYPE_H
+#include <linux/acpi.h>
+
+#define BOOT_BOOT_H
+#include "../ctype.h"
+#include "../io.h"
+
+#include "efi.h"
+
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
+/* boot/compressed/vmlinux start and end markers */
+extern char _head[], _end[];
+
+/* misc.c */
+extern memptr free_mem_ptr;
+extern memptr free_mem_end_ptr;
+void *malloc(int size);
+void free(void *where);
+extern struct boot_params *boot_params;
+void __putstr(const char *s);
+void __puthex(unsigned long value);
+#define error_putstr(__x) __putstr(__x)
+#define error_puthex(__x) __puthex(__x)
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+
+#define debug_putstr(__x) __putstr(__x)
+#define debug_puthex(__x) __puthex(__x)
+#define debug_putaddr(__x) { \
+ debug_putstr(#__x ": 0x"); \
+ debug_puthex((unsigned long)(__x)); \
+ debug_putstr("\n"); \
+ }
+
+#else
+
+static inline void debug_putstr(const char *s)
+{ }
+static inline void debug_puthex(unsigned long value)
+{ }
+#define debug_putaddr(x) /* */
+
+#endif
+
+/* cmdline.c */
+int cmdline_find_option(const char *option, char *buffer, int bufsize);
+int cmdline_find_option_bool(const char *option);
+
+struct mem_vector {
+ u64 start;
+ u64 size;
+};
+
+#ifdef CONFIG_RANDOMIZE_BASE
+/* kaslr.c */
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr);
+#else
+static inline void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
+{
+}
+#endif
+
+/* cpuflags.c */
+bool has_cpuflag(int flag);
+
+#ifdef CONFIG_X86_64
+extern int set_page_decrypted(unsigned long address);
+extern int set_page_encrypted(unsigned long address);
+extern int set_page_non_present(unsigned long address);
+extern unsigned char _pgtable[];
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+#else
+static const int early_serial_base;
+static inline void console_init(void)
+{ }
+#endif
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_enable(struct boot_params *bp);
+void snp_check_features(void);
+void sev_es_shutdown_ghcb(void);
+extern bool sev_es_check_ghcb_fault(unsigned long address);
+void snp_set_page_private(unsigned long paddr);
+void snp_set_page_shared(unsigned long paddr);
+void sev_prep_identity_maps(unsigned long top_level_pgt);
+#else
+static inline void sev_enable(struct boot_params *bp)
+{
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 unconditionally (thus here in this stub too) to
+ * ensure that uninitialized values from buggy bootloaders aren't
+ * propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+}
+static inline void snp_check_features(void) { }
+static inline void sev_es_shutdown_ghcb(void) { }
+static inline bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ return false;
+}
+static inline void snp_set_page_private(unsigned long paddr) { }
+static inline void snp_set_page_shared(unsigned long paddr) { }
+static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { }
+#endif
+
+/* acpi.c */
+#ifdef CONFIG_ACPI
+acpi_physical_address get_rsdp_addr(void);
+#else
+static inline acpi_physical_address get_rsdp_addr(void) { return 0; }
+#endif
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+extern struct mem_vector immovable_mem[MAX_NUMNODES*2];
+int count_immovable_mem_regions(void);
+#else
+static inline int count_immovable_mem_regions(void) { return 0; }
+#endif
+
+/* ident_map_64.c */
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
+#endif
+extern void kernel_add_identity_map(unsigned long start, unsigned long end);
+
+/* Used by PAGE_KERN* macros: */
+extern pteval_t __default_kernel_pte_mask;
+
+/* idt_64.c */
+extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
+extern struct desc_ptr boot_idt_desc;
+
+#ifdef CONFIG_X86_64
+void cleanup_exception_handling(void);
+#else
+static inline void cleanup_exception_handling(void) { }
+#endif
+
+/* IDT Entry Points */
+void boot_page_fault(void);
+void boot_stage1_vc(void);
+void boot_stage2_vc(void);
+
+unsigned long sev_verify_cbit(unsigned long cr3);
+
+enum efi_type {
+ EFI_TYPE_64,
+ EFI_TYPE_32,
+ EFI_TYPE_NONE,
+};
+
+#ifdef CONFIG_EFI
+/* helpers for early EFI config table access */
+enum efi_type efi_get_type(struct boot_params *bp);
+unsigned long efi_get_system_table(struct boot_params *bp);
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len);
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid);
+#else
+static inline enum efi_type efi_get_type(struct boot_params *bp)
+{
+ return EFI_TYPE_NONE;
+}
+
+static inline unsigned long efi_get_system_table(struct boot_params *bp)
+{
+ return 0;
+}
+
+static inline int efi_get_conf_table(struct boot_params *bp,
+ unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len)
+{
+ return -ENOENT;
+}
+
+static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid)
+{
+ return 0;
+}
+#endif /* CONFIG_EFI */
+
+#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 000000000..52aa56cdb
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * -----------------------------------------------------------------------
+ *
+ * Outputs a small assembly wrapper with the appropriate symbols defined.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <tools/le_byteshift.h>
+
+int main(int argc, char *argv[])
+{
+ uint32_t olen;
+ long ilen;
+ FILE *f = NULL;
+ int retval = 1;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+ goto bail;
+ }
+
+ /* Get the information for the compressed kernel image first */
+
+ f = fopen(argv[1], "r");
+ if (!f) {
+ perror(argv[1]);
+ goto bail;
+ }
+
+
+ if (fseek(f, -4L, SEEK_END)) {
+ perror(argv[1]);
+ }
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ goto bail;
+ }
+
+ ilen = ftell(f);
+ olen = get_unaligned_le32(&olen);
+
+ printf(".section \".rodata..compressed\",\"a\",@progbits\n");
+ printf(".globl z_input_len\n");
+ printf("z_input_len = %lu\n", ilen);
+ printf(".globl z_output_len\n");
+ printf("z_output_len = %lu\n", (unsigned long)olen);
+
+ printf(".globl input_data, input_data_end\n");
+ printf("input_data:\n");
+ printf(".incbin \"%s\"\n", argv[1]);
+ printf("input_data_end:\n");
+
+ printf(".section \".rodata\",\"a\",@progbits\n");
+ printf(".globl input_len\n");
+ printf("input_len:\n\t.long %lu\n", ilen);
+ printf(".globl output_len\n");
+ printf("output_len:\n\t.long %lu\n", (unsigned long)olen);
+
+ retval = 0;
+bail:
+ if (f)
+ fclose(f);
+ return retval;
+}
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
new file mode 100644
index 000000000..cc9b2529a
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -0,0 +1,20 @@
+#ifndef BOOT_COMPRESSED_PAGETABLE_H
+#define BOOT_COMPRESSED_PAGETABLE_H
+
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
+
+#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+
+#ifndef __ASSEMBLER__
+
+extern unsigned long *trampoline_32bit;
+
+extern void trampoline_32bit_src(void *return_ptr);
+
+#endif /* __ASSEMBLER__ */
+#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000..2ac12ff41
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "misc.h"
+#include <asm/e820/types.h>
+#include <asm/processor.h>
+#include "pgtable.h"
+#include "../string.h"
+#include "efi.h"
+
+#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
+#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+
+#ifdef CONFIG_X86_5LEVEL
+/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
+unsigned int __section(".data") __pgtable_l5_enabled;
+unsigned int __section(".data") pgdir_shift = 39;
+unsigned int __section(".data") ptrs_per_p4d = 1;
+#endif
+
+struct paging_config {
+ unsigned long trampoline_start;
+ unsigned long l5_required;
+};
+
+/* Buffer to preserve trampoline memory */
+static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+
+/*
+ * Trampoline address will be printed by extract_kernel() for debugging
+ * purposes.
+ *
+ * Avoid putting the pointer into .bss as it will be cleared between
+ * paging_prepare() and extract_kernel().
+ */
+unsigned long *trampoline_32bit __section(".data");
+
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+static unsigned long find_trampoline_placement(void)
+{
+ unsigned long bios_start = 0, ebda_start = 0;
+ struct boot_e820_entry *entry;
+ char *signature;
+ int i;
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ /*
+ * EFI systems may not provide legacy ROM. The memory may not be mapped
+ * at all.
+ *
+ * Only look for values in the legacy ROM for non-EFI system.
+ */
+ signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+ }
+
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Find the first usable memory region under bios_start. */
+ for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ unsigned long new = bios_start;
+
+ entry = &boot_params->e820_table[i];
+
+ /* Skip all entries above bios_start. */
+ if (bios_start <= entry->addr)
+ continue;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ /* Adjust bios_start to the end of the entry if needed. */
+ if (bios_start > entry->addr + entry->size)
+ new = entry->addr + entry->size;
+
+ /* Keep bios_start page-aligned. */
+ new = round_down(new, PAGE_SIZE);
+
+ /* Skip the entry if it's too small. */
+ if (new - TRAMPOLINE_32BIT_SIZE < entry->addr)
+ continue;
+
+ /* Protect against underflow. */
+ if (new - TRAMPOLINE_32BIT_SIZE > bios_start)
+ break;
+
+ bios_start = new;
+ break;
+ }
+
+ /* Place the trampoline just below the end of low memory */
+ return bios_start - TRAMPOLINE_32BIT_SIZE;
+}
+
+struct paging_config paging_prepare(void *rmode)
+{
+ struct paging_config paging_config = {};
+
+ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
+ boot_params = rmode;
+
+ /*
+ * Check if LA57 is desired and supported.
+ *
+ * There are several parts to the check:
+ * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if user asked to disable 5-level paging: no5lvl in cmdline
+ * - if the machine supports 5-level paging:
+ * + CPUID leaf 7 is supported
+ * + the leaf has the feature bit set
+ *
+ * That's substitute for boot_cpu_has() in early boot code.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ !cmdline_find_option_bool("no5lvl") &&
+ native_cpuid_eax(0) >= 7 &&
+ (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+ paging_config.l5_required = 1;
+ }
+
+ paging_config.trampoline_start = find_trampoline_placement();
+
+ trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+
+ /* Preserve trampoline memory */
+ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
+
+ /* Clear trampoline memory first */
+ memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
+
+ /* Copy trampoline code in place */
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
+ /*
+ * The code below prepares page table in trampoline memory.
+ *
+ * The new page table will be used by trampoline code for switching
+ * from 4- to 5-level paging or vice versa.
+ *
+ * If switching is not required, the page table is unused: trampoline
+ * code wouldn't touch CR3.
+ */
+
+ /*
+ * We are not going to use the page table in trampoline memory if we
+ * are already in the desired paging mode.
+ */
+ if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ goto out;
+
+ if (paging_config.l5_required) {
+ /*
+ * For 4- to 5-level paging transition, set up current CR3 as
+ * the first and the only entry in a new top-level page table.
+ */
+ trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ } else {
+ unsigned long src;
+
+ /*
+ * For 5- to 4-level paging transition, copy page table pointed
+ * by first entry in the current top-level page table as our
+ * new top-level page table.
+ *
+ * We cannot just point to the page table from trampoline as it
+ * may be above 4G.
+ */
+ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+ (void *)src, PAGE_SIZE);
+ }
+
+out:
+ return paging_config;
+}
+
+void cleanup_trampoline(void *pgtable)
+{
+ void *trampoline_pgtable;
+
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+
+ /*
+ * Move the top level page table out of trampoline memory,
+ * if it's there.
+ */
+ if ((void *)__native_read_cr3() == trampoline_pgtable) {
+ memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);
+ }
+
+ /* Restore trampoline memory */
+ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+
+ /* Initialize variables for 5-level paging */
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
+}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
new file mode 100644
index 000000000..9c91cc40f
--- /dev/null
+++ b/arch/x86/boot/compressed/sev.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+/*
+ * misc.h needs to be first because it knows how to include the other kernel
+ * headers in the pre-decompression code in a way that does not break
+ * compilation.
+ */
+#include "misc.h"
+
+#include <asm/pgtable_types.h>
+#include <asm/sev.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/msr-index.h>
+#include <asm/fpu/xcr.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
+#include <asm/cpuid.h>
+
+#include "error.h"
+#include "../msr.h"
+
+struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
+struct ghcb *boot_ghcb;
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+static bool insn_has_rep_prefix(struct insn *insn)
+{
+ insn_byte_t p;
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+ return 0UL;
+}
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ struct msr m;
+
+ boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
+
+ return m.q;
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ struct msr m;
+
+ m.q = val;
+ boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int ret;
+
+ memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+
+ return ES_OK;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ void *dst, char *buf, size_t size)
+{
+ memcpy(dst, buf, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ void *src, char *buf, size_t size)
+{
+ memcpy(buf, src, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ return ES_OK;
+}
+
+static bool fault_in_kernel_space(unsigned long address)
+{
+ return false;
+}
+
+#undef __init
+#undef __pa
+#define __init
+#define __pa(x) ((unsigned long)(x))
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/* Include code for early handlers */
+#include "../../kernel/sev-shared.c"
+
+static inline bool sev_snp_enabled(void)
+{
+ return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+}
+
+static void __page_state_change(unsigned long paddr, enum psc_op op)
+{
+ u64 val;
+
+ if (!sev_snp_enabled())
+ return;
+
+ /*
+ * If private -> shared then invalidate the page before requesting the
+ * state change in the RMP table.
+ */
+ if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+ /* Issue VMGEXIT to change the page state in RMP table. */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+ VMGEXIT();
+
+ /* Read the response of the VMGEXIT. */
+ val = sev_es_rd_ghcb_msr();
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ /*
+ * Now that page state is changed in the RMP table, validate it so that it is
+ * consistent with the RMP entry.
+ */
+ if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+void snp_set_page_private(unsigned long paddr)
+{
+ __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+}
+
+void snp_set_page_shared(unsigned long paddr)
+{
+ __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+}
+
+static bool early_setup_ghcb(void)
+{
+ if (set_page_decrypted((unsigned long)&boot_ghcb_page))
+ return false;
+
+ /* Page is now mapped decrypted, clear it */
+ memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
+
+ boot_ghcb = &boot_ghcb_page;
+
+ /* Initialize lookup tables for the instruction decoder */
+ inat_init_tables();
+
+ /* SNP guest requires the GHCB GPA must be registered */
+ if (sev_snp_enabled())
+ snp_register_ghcb_early(__pa(&boot_ghcb_page));
+
+ return true;
+}
+
+void sev_es_shutdown_ghcb(void)
+{
+ if (!boot_ghcb)
+ return;
+
+ if (!sev_es_check_cpu_features())
+ error("SEV-ES CPU Features missing.");
+
+ /*
+ * GHCB Page must be flushed from the cache and mapped encrypted again.
+ * Otherwise the running kernel will see strange cache effects when
+ * trying to use that page.
+ */
+ if (set_page_encrypted((unsigned long)&boot_ghcb_page))
+ error("Can't map GHCB page encrypted");
+
+ /*
+ * GHCB page is mapped encrypted again and flushed from the cache.
+ * Mark it non-present now to catch bugs when #VC exceptions trigger
+ * after this point.
+ */
+ if (set_page_non_present((unsigned long)&boot_ghcb_page))
+ error("Can't unmap GHCB page");
+}
+
+static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set,
+ unsigned int reason, u64 exit_info_2)
+{
+ u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ /* Check whether the fault was on the GHCB page */
+ return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
+}
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ if (!boot_ghcb && !early_setup_ghcb())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ vc_ghcb_invalidate(boot_ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
+ switch (exit_code) {
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(boot_ghcb, &ctxt);
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(boot_ghcb, &ctxt);
+ break;
+ default:
+ result = ES_UNSUPPORTED;
+ break;
+ }
+
+finish:
+ if (result == ES_OK)
+ vc_finish_insn(&ctxt);
+ else if (result != ES_RETRY)
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+static void enforce_vmpl0(void)
+{
+ u64 attrs;
+ int err;
+
+ /*
+ * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
+ * higher) privilege level. Here, clear the VMPL1 permission mask of the
+ * GHCB page. If the guest is not running at VMPL0, this will fail.
+ *
+ * If the guest is running at VMPL0, it will succeed. Even if that operation
+ * modifies permission bits, it is still ok to do so currently because Linux
+ * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
+ * changing is a don't-care.
+ */
+ attrs = 1;
+ if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
+}
+
+/*
+ * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
+ * guest side implementation for proper functioning of the guest. If any
+ * of these features are enabled in the hypervisor but are lacking guest
+ * side implementation, the behavior of the guest will be undefined. The
+ * guest could fail in non-obvious way making it difficult to debug.
+ *
+ * As the behavior of reserved feature bits is unknown to be on the
+ * safe side add them to the required features mask.
+ */
+#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \
+ MSR_AMD64_SNP_REFLECT_VC | \
+ MSR_AMD64_SNP_RESTRICTED_INJ | \
+ MSR_AMD64_SNP_ALT_INJ | \
+ MSR_AMD64_SNP_DEBUG_SWAP | \
+ MSR_AMD64_SNP_VMPL_SSS | \
+ MSR_AMD64_SNP_SECURE_TSC | \
+ MSR_AMD64_SNP_VMGEXIT_PARAM | \
+ MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
+ MSR_AMD64_SNP_RESERVED_BIT13 | \
+ MSR_AMD64_SNP_RESERVED_BIT15 | \
+ MSR_AMD64_SNP_RESERVED_MASK)
+
+/*
+ * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
+ * by the guest kernel. As and when a new feature is implemented in the
+ * guest kernel, a corresponding bit should be added to the mask.
+ */
+#define SNP_FEATURES_PRESENT (0)
+
+void snp_check_features(void)
+{
+ u64 unsupported;
+
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /*
+ * Terminate the boot if hypervisor has enabled any feature lacking
+ * guest side implementation. Pass on the unsupported features mask through
+ * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
+ * as part of the guest boot failure.
+ */
+ unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ if (unsupported) {
+ if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN,
+ GHCB_SNP_UNSUPPORTED, unsupported);
+ }
+}
+
+void sev_enable(struct boot_params *bp)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct msr m;
+ bool snp;
+
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 to ensure that uninitialized values from
+ * buggy bootloaders aren't propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+
+ /*
+ * Do an initial SEV capability check before snp_init() which
+ * loads the CPUID page and the same checks afterwards are done
+ * without the hypervisor and are trustworthy.
+ *
+ * If the HV fakes SEV support, the guest will crash'n'burn
+ * which is good enough.
+ */
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+ /*
+ * Check for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Check whether SEV is supported */
+ if (!(eax & BIT(1)))
+ return;
+
+ /*
+ * Setup/preliminary detection of SNP. This will be sanity-checked
+ * against CPUID/MSR values later.
+ */
+ snp = snp_init(bp);
+
+ /* Now repeat the checks with the SNP CPUID table. */
+
+ /* Recheck the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+ /*
+ * Recheck for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Check whether SEV is supported */
+ if (!(eax & BIT(1))) {
+ if (snp)
+ error("SEV-SNP support indicated by CC blob, but not CPUID.");
+ return;
+ }
+
+ /* Set the SME mask if this is an SEV guest. */
+ boot_rdmsr(MSR_AMD64_SEV, &m);
+ sev_status = m.q;
+ if (!(sev_status & MSR_AMD64_SEV_ENABLED))
+ return;
+
+ /* Negotiate the GHCB protocol version. */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) {
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED);
+ }
+
+ /*
+ * SNP is supported in v2 of the GHCB spec which mandates support for HV
+ * features.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ if (!(get_hv_features() & GHCB_HV_FT_SNP))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ enforce_vmpl0();
+ }
+
+ if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
+
+ sme_me_mask = BIT_ULL(ebx & 0x3f);
+}
+
+/* Search for Confidential Computing blob in the EFI config table. */
+static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
+{
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ int ret;
+
+ ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
+ if (ret)
+ return NULL;
+
+ return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
+ cfg_table_len,
+ EFI_CC_BLOB_GUID);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the boot kernel
+ * by firmware/bootloader in the following ways:
+ *
+ * - via an entry in the EFI config table
+ * - via a setup_data structure, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ cc_info = find_cc_blob_efi(bp);
+ if (cc_info)
+ goto found_cc_info;
+
+ cc_info = find_cc_blob_setup_data(bp);
+ if (!cc_info)
+ return NULL;
+
+found_cc_info:
+ if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ return cc_info;
+}
+
+/*
+ * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
+ * will verify the SNP CPUID/MSR bits.
+ */
+bool snp_init(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ if (!bp)
+ return false;
+
+ cc_info = find_cc_blob(bp);
+ if (!cc_info)
+ return false;
+
+ /*
+ * If a SNP-specific Confidential Computing blob is present, then
+ * firmware/bootloader have indicated SNP support. Verifying this
+ * involves CPUID checks which will be more reliable if the SNP
+ * CPUID table is used. See comments over snp_setup_cpuid_table() for
+ * more details.
+ */
+ setup_cpuid_table(cc_info);
+
+ /*
+ * Pass run-time kernel a pointer to CC info via boot_params so EFI
+ * config table doesn't need to be searched again during early startup
+ * phase.
+ */
+ bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+ return true;
+}
+
+void sev_prep_identity_maps(unsigned long top_level_pgt)
+{
+ /*
+ * The Confidential Computing blob is used very early in uncompressed
+ * kernel to find the in-memory CPUID table to handle CPUID
+ * instructions. Make sure an identity-mapping exists so it can be
+ * accessed after switchover.
+ */
+ if (sev_snp_enabled()) {
+ unsigned long cc_info_pa = boot_params->cc_blob_address;
+ struct cc_blob_sev_info *cc_info;
+
+ kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
+
+ cc_info = (struct cc_blob_sev_info *)cc_info_pa;
+ kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len);
+ }
+
+ sev_verify_cbit(top_level_pgt);
+}
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
new file mode 100644
index 000000000..81fc1eaa3
--- /dev/null
+++ b/arch/x86/boot/compressed/string.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This provides an optimized implementation of memcpy, and a simplified
+ * implementation of memset and memmove. These are used here because the
+ * standard kernel runtime versions are not yet available and we don't
+ * trust the gcc built-in implementations as they may do unexpected things
+ * (e.g. FPU ops) in the minimal decompression stub execution environment.
+ */
+#include "error.h"
+
+#include "../string.c"
+
+#ifdef CONFIG_X86_32
+static void *____memcpy(void *dest, const void *src, size_t n)
+{
+ int d0, d1, d2;
+ asm volatile(
+ "rep ; movsl\n\t"
+ "movl %4,%%ecx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
+}
+#else
+static void *____memcpy(void *dest, const void *src, size_t n)
+{
+ long d0, d1, d2;
+ asm volatile(
+ "rep ; movsq\n\t"
+ "movq %4,%%rcx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
+}
+#endif
+
+void *memset(void *s, int c, size_t n)
+{
+ int i;
+ char *ss = s;
+
+ for (i = 0; i < n; i++)
+ ss[i] = c;
+ return s;
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ unsigned char *d = dest;
+ const unsigned char *s = src;
+
+ if (d <= s || d - s >= n)
+ return ____memcpy(dest, src, n);
+
+ while (n-- > 0)
+ d[n] = s[n];
+
+ return dest;
+}
+
+/* Detect and warn about potential overlaps, but handle them with memmove. */
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ if (dest > src && dest - src < n) {
+ warn("Avoiding potentially unsafe overlapping memcpy()!");
+ return memmove(dest, src, n);
+ }
+ return ____memcpy(dest, src, n);
+}
+
+#ifdef CONFIG_KASAN
+extern void *__memset(void *s, int c, size_t n) __alias(memset);
+extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove);
+extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy);
+#endif
diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
new file mode 100644
index 000000000..46d0495e0
--- /dev/null
+++ b/arch/x86/boot/compressed/tdcall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../../coco/tdx/tdcall.S"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
new file mode 100644
index 000000000..918a7606f
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../cpuflags.h"
+#include "../string.h"
+#include "../io.h"
+#include "error.h"
+
+#include <vdso/limits.h>
+#include <uapi/asm/vmx.h>
+
+#include <asm/shared/tdx.h>
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+ error("TDVMCALL failed. TDX module bug?");
+}
+
+static inline unsigned int tdx_io_in(int size, u16 port)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r12 = size,
+ .r13 = 0,
+ .r14 = port,
+ };
+
+ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+ return UINT_MAX;
+
+ return args.r11;
+}
+
+static inline void tdx_io_out(int size, u16 port, u32 value)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r12 = size,
+ .r13 = 1,
+ .r14 = port,
+ .r15 = value,
+ };
+
+ __tdx_hypercall(&args, 0);
+}
+
+static inline u8 tdx_inb(u16 port)
+{
+ return tdx_io_in(1, port);
+}
+
+static inline void tdx_outb(u8 value, u16 port)
+{
+ tdx_io_out(1, port, value);
+}
+
+static inline void tdx_outw(u16 value, u16 port)
+{
+ tdx_io_out(2, port, value);
+}
+
+void early_tdx_detect(void)
+{
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]);
+
+ if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+ return;
+
+ /* Use hypercalls instead of I/O instructions */
+ pio_ops.f_inb = tdx_inb;
+ pio_ops.f_outb = tdx_outb;
+ pio_ops.f_outw = tdx_outw;
+}
diff --git a/arch/x86/boot/compressed/tdx.h b/arch/x86/boot/compressed/tdx.h
new file mode 100644
index 000000000..9055482cd
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_TDX_H
+#define BOOT_COMPRESSED_TDX_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+void early_tdx_detect(void);
+#else
+static inline void early_tdx_detect(void) { };
+#endif
+
+#endif /* BOOT_COMPRESSED_TDX_H */
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000..112b2375d
--- /dev/null
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
+
+#undef i386
+
+#include <asm/cache.h>
+#include <asm/page_types.h>
+
+#ifdef CONFIG_X86_64
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
+SECTIONS
+{
+ /* Be careful parts of head_64.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .rodata..compressed : {
+ *(.rodata..compressed)
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ *(.bss.efistub)
+ _edata = . ;
+ }
+ . = ALIGN(L1_CACHE_BYTES);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ .pgtable : {
+ _pgtable = . ;
+ *(.pgtable)
+ _epgtable = . ;
+ }
+#endif
+ . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
+ _end = .;
+
+ STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
+
+ DISCARDS
+ /DISCARD/ : {
+ *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
+ *(.hash) *(.gnu.hash)
+ *(.note.*)
+ }
+
+ .got.plt (INFO) : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 ||
+#ifdef CONFIG_X86_64
+ SIZEOF(.got.plt) == 0x18,
+#else
+ SIZEOF(.got.plt) == 0xc,
+#endif
+ "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .got : {
+ *(.got)
+ }
+ ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")
+
+ .plt : {
+ *(.plt) *(.plt.*)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .rel.dyn : {
+ *(.rel.*) *(.rel_*)
+ }
+ ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!")
+
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
new file mode 100644
index 000000000..6afd05e81
--- /dev/null
+++ b/arch/x86/boot/copy.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/linkage.h>
+
+/*
+ * Memory copy routines
+ */
+
+ .code16
+ .text
+
+SYM_FUNC_START_NOALIGN(memcpy)
+ pushw %si
+ pushw %di
+ movw %ax, %di
+ movw %dx, %si
+ pushw %cx
+ shrw $2, %cx
+ rep; movsl
+ popw %cx
+ andw $3, %cx
+ rep; movsb
+ popw %di
+ popw %si
+ retl
+SYM_FUNC_END(memcpy)
+
+SYM_FUNC_START_NOALIGN(memset)
+ pushw %di
+ movw %ax, %di
+ movzbl %dl, %eax
+ imull $0x01010101,%eax
+ pushw %cx
+ shrw $2, %cx
+ rep; stosl
+ popw %cx
+ andw $3, %cx
+ rep; stosb
+ popw %di
+ retl
+SYM_FUNC_END(memset)
+
+SYM_FUNC_START_NOALIGN(copy_from_fs)
+ pushw %ds
+ pushw %fs
+ popw %ds
+ calll memcpy
+ popw %ds
+ retl
+SYM_FUNC_END(copy_from_fs)
+
+SYM_FUNC_START_NOALIGN(copy_to_fs)
+ pushw %es
+ pushw %fs
+ popw %es
+ calll memcpy
+ popw %es
+ retl
+SYM_FUNC_END(copy_to_fs)
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
new file mode 100644
index 000000000..0bbf4f370
--- /dev/null
+++ b/arch/x86/boot/cpu.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/x86/boot/cpu.c
+ *
+ * Check for obligatory CPU features and abort if the features are not
+ * present.
+ */
+
+#include "boot.h"
+#ifdef CONFIG_X86_FEATURE_NAMES
+#include "cpustr.h"
+#endif
+
+static char *cpu_name(int level)
+{
+ static char buf[6];
+
+ if (level == 64) {
+ return "x86-64";
+ } else {
+ if (level == 15)
+ level = 6;
+ sprintf(buf, "i%d86", level);
+ return buf;
+ }
+}
+
+static void show_cap_strs(u32 *err_flags)
+{
+ int i, j;
+#ifdef CONFIG_X86_FEATURE_NAMES
+ const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (msg_strs[0] < i ||
+ (msg_strs[0] == i && msg_strs[1] < j)) {
+ /* Skip to the next string */
+ msg_strs += 2;
+ while (*msg_strs++)
+ ;
+ }
+ if (e & 1) {
+ if (msg_strs[0] == i &&
+ msg_strs[1] == j &&
+ msg_strs[2])
+ printf("%s ", msg_strs+2);
+ else
+ printf("%d:%d ", i, j);
+ }
+ e >>= 1;
+ }
+ }
+#else
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (e & 1)
+ printf("%d:%d ", i, j);
+ e >>= 1;
+ }
+ }
+#endif
+}
+
+int validate_cpu(void)
+{
+ u32 *err_flags;
+ int cpu_level, req_level;
+
+ check_cpu(&cpu_level, &req_level, &err_flags);
+
+ if (cpu_level < req_level) {
+ printf("This kernel requires an %s CPU, ",
+ cpu_name(req_level));
+ printf("but only detected an %s CPU.\n",
+ cpu_name(cpu_level));
+ return -1;
+ }
+
+ if (err_flags) {
+ puts("This kernel requires the following features "
+ "not present on the CPU:\n");
+ show_cap_strs(err_flags);
+ putchar('\n');
+ return -1;
+ } else if (check_knl_erratum()) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
new file mode 100644
index 000000000..fed8d13ce
--- /dev/null
+++ b/arch/x86/boot/cpucheck.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Check for obligatory CPU features and abort if the features are not
+ * present. This code should be compilable as 16-, 32- or 64-bit
+ * code, so be very careful with types and inline assembly.
+ *
+ * This code should not contain any messages; that requires an
+ * additional wrapper.
+ *
+ * As written, this code is not safe for inclusion into the kernel
+ * proper (after FPU initialization, in particular).
+ */
+
+#ifdef _SETUP
+# include "boot.h"
+#endif
+#include <linux/types.h>
+#include <asm/intel-family.h>
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "string.h"
+#include "msr.h"
+
+static u32 err_flags[NCAPINTS];
+
+static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
+
+static const u32 req_flags[NCAPINTS] =
+{
+ REQUIRED_MASK0,
+ REQUIRED_MASK1,
+ 0, /* REQUIRED_MASK2 not implemented in this file */
+ 0, /* REQUIRED_MASK3 not implemented in this file */
+ REQUIRED_MASK4,
+ 0, /* REQUIRED_MASK5 not implemented in this file */
+ REQUIRED_MASK6,
+ 0, /* REQUIRED_MASK7 not implemented in this file */
+ 0, /* REQUIRED_MASK8 not implemented in this file */
+ 0, /* REQUIRED_MASK9 not implemented in this file */
+ 0, /* REQUIRED_MASK10 not implemented in this file */
+ 0, /* REQUIRED_MASK11 not implemented in this file */
+ 0, /* REQUIRED_MASK12 not implemented in this file */
+ 0, /* REQUIRED_MASK13 not implemented in this file */
+ 0, /* REQUIRED_MASK14 not implemented in this file */
+ 0, /* REQUIRED_MASK15 not implemented in this file */
+ REQUIRED_MASK16,
+};
+
+#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
+
+static int is_amd(void)
+{
+ return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
+ cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
+ cpu_vendor[2] == A32('c', 'A', 'M', 'D');
+}
+
+static int is_centaur(void)
+{
+ return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
+ cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
+ cpu_vendor[2] == A32('a', 'u', 'l', 's');
+}
+
+static int is_transmeta(void)
+{
+ return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+ cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
+ cpu_vendor[2] == A32('M', 'x', '8', '6');
+}
+
+static int is_intel(void)
+{
+ return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+ cpu_vendor[1] == A32('i', 'n', 'e', 'I') &&
+ cpu_vendor[2] == A32('n', 't', 'e', 'l');
+}
+
+/* Returns a bitmask of which words we have error bits in */
+static int check_cpuflags(void)
+{
+ u32 err;
+ int i;
+
+ err = 0;
+ for (i = 0; i < NCAPINTS; i++) {
+ err_flags[i] = req_flags[i] & ~cpu.flags[i];
+ if (err_flags[i])
+ err |= 1 << i;
+ }
+
+ return err;
+}
+
+/*
+ * Returns -1 on error.
+ *
+ * *cpu_level is set to the current CPU level; *req_level to the required
+ * level. x86-64 is considered level 64 for this purpose.
+ *
+ * *err_flags_ptr is set to the flags error array if there are flags missing.
+ */
+int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
+{
+ int err;
+
+ memset(&cpu.flags, 0, sizeof(cpu.flags));
+ cpu.level = 3;
+
+ if (has_eflag(X86_EFLAGS_AC))
+ cpu.level = 4;
+
+ get_cpuflags();
+ err = check_cpuflags();
+
+ if (test_bit(X86_FEATURE_LM, cpu.flags))
+ cpu.level = 64;
+
+ if (err == 0x01 &&
+ !(err_flags[0] &
+ ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
+ is_amd()) {
+ /* If this is an AMD and we're only missing SSE+SSE2, try to
+ turn them on */
+
+ struct msr m;
+
+ boot_rdmsr(MSR_K7_HWCR, &m);
+ m.l &= ~(1 << 15);
+ boot_wrmsr(MSR_K7_HWCR, &m);
+
+ get_cpuflags(); /* Make sure it really did something */
+ err = check_cpuflags();
+ } else if (err == 0x01 &&
+ !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
+ is_centaur() && cpu.model >= 6) {
+ /* If this is a VIA C3, we might have to enable CX8
+ explicitly */
+
+ struct msr m;
+
+ boot_rdmsr(MSR_VIA_FCR, &m);
+ m.l |= (1 << 1) | (1 << 7);
+ boot_wrmsr(MSR_VIA_FCR, &m);
+
+ set_bit(X86_FEATURE_CX8, cpu.flags);
+ err = check_cpuflags();
+ } else if (err == 0x01 && is_transmeta()) {
+ /* Transmeta might have masked feature bits in word 0 */
+
+ struct msr m, m_tmp;
+ u32 level = 1;
+
+ boot_rdmsr(0x80860004, &m);
+ m_tmp = m;
+ m_tmp.l = ~0;
+ boot_wrmsr(0x80860004, &m_tmp);
+ asm("cpuid"
+ : "+a" (level), "=d" (cpu.flags[0])
+ : : "ecx", "ebx");
+ boot_wrmsr(0x80860004, &m);
+
+ err = check_cpuflags();
+ } else if (err == 0x01 &&
+ !(err_flags[0] & ~(1 << X86_FEATURE_PAE)) &&
+ is_intel() && cpu.level == 6 &&
+ (cpu.model == 9 || cpu.model == 13)) {
+ /* PAE is disabled on this Pentium M but can be forced */
+ if (cmdline_find_option_bool("forcepae")) {
+ puts("WARNING: Forcing PAE in CPU flags\n");
+ set_bit(X86_FEATURE_PAE, cpu.flags);
+ err = check_cpuflags();
+ }
+ else {
+ puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
+ }
+ }
+ if (!err)
+ err = check_knl_erratum();
+
+ if (err_flags_ptr)
+ *err_flags_ptr = err ? err_flags : NULL;
+ if (cpu_level_ptr)
+ *cpu_level_ptr = cpu.level;
+ if (req_level_ptr)
+ *req_level_ptr = req_level;
+
+ return (cpu.level < req_level || err) ? -1 : 0;
+}
+
+int check_knl_erratum(void)
+{
+ /*
+ * First check for the affected model/family:
+ */
+ if (!is_intel() ||
+ cpu.family != 6 ||
+ cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+ return 0;
+
+ /*
+ * This erratum affects the Accessed/Dirty bits, and can
+ * cause stray bits to be set in !Present PTEs. We have
+ * enough bits in our 64-bit PTEs (which we have on real
+ * 64-bit mode or PAE) to avoid using these troublesome
+ * bits. But, we do not have enough space in our 32-bit
+ * PTEs. So, refuse to run on 32-bit non-PAE kernels.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
+ return 0;
+
+ puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
+ "processor due to a processor erratum. Use a 64-bit\n"
+ "kernel, or enable PAE in this 32-bit kernel.\n\n");
+
+ return -1;
+}
+
+
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 000000000..a83d67ec6
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+ u16 fcw = -1, fsw = -1;
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0));
+ if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+ cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ }
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+/*
+ * For building the 16-bit code we want to explicitly specify 32-bit
+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
+ * letting the compiler choose. But this is also included from the
+ * compressed/ directory where it may be 64-bit code, and thus needs
+ * to be 'pushfq' or 'popfq' in that case.
+ */
+#ifdef __x86_64__
+#define PUSHF "pushfq"
+#define POPF "popfq"
+#else
+#define PUSHF "pushfl"
+#define POPF "popfl"
+#endif
+
+int has_eflag(unsigned long mask)
+{
+ unsigned long f0, f1;
+
+ asm volatile(PUSHF " \n\t"
+ PUSHF " \n\t"
+ "pop %0 \n\t"
+ "mov %0,%1 \n\t"
+ "xor %2,%1 \n\t"
+ "push %1 \n\t"
+ POPF " \n\t"
+ PUSHF " \n\t"
+ "pop %1 \n\t"
+ POPF
+ : "=&r" (f0), "=&r" (f1)
+ : "ri" (mask));
+
+ return !!((f0^f1) & mask);
+}
+
+/* Handle x86_32 PIC using ebx. */
+#if defined(__i386__) && defined(__PIC__)
+# define EBX_REG "=r"
+#else
+# define EBX_REG "=b"
+#endif
+
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+ asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
+ "cpuid \n\t"
+ ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
+ : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
+ : "a" (id), "c" (count)
+ );
+}
+
+#define cpuid(id, a, b, c, d) cpuid_count(id, 0, a, b, c, d)
+
+void get_cpuflags(void)
+{
+ u32 max_intel_level, max_amd_level;
+ u32 tfms;
+ u32 ignored;
+
+ if (loaded_flags)
+ return;
+ loaded_flags = true;
+
+ if (has_fpu())
+ set_bit(X86_FEATURE_FPU, cpu.flags);
+
+ if (has_eflag(X86_EFLAGS_ID)) {
+ cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+ &cpu_vendor[1]);
+
+ if (max_intel_level >= 0x00000001 &&
+ max_intel_level <= 0x0000ffff) {
+ cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+ &cpu.flags[0]);
+ cpu.level = (tfms >> 8) & 15;
+ cpu.family = cpu.level;
+ cpu.model = (tfms >> 4) & 15;
+ if (cpu.level >= 6)
+ cpu.model += ((tfms >> 16) & 0xf) << 4;
+ }
+
+ if (max_intel_level >= 0x00000007) {
+ cpuid_count(0x00000007, 0, &ignored, &ignored,
+ &cpu.flags[16], &ignored);
+ }
+
+ cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+ &ignored);
+
+ if (max_amd_level >= 0x80000001 &&
+ max_amd_level <= 0x8000ffff) {
+ cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+ &cpu.flags[1]);
+ }
+ }
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 000000000..475b8fde9
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeatures.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int family; /* Family, always */
+ int model;
+ u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_cpuflags(void);
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d);
+
+#endif
diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h
new file mode 100644
index 000000000..8f5ef2994
--- /dev/null
+++ b/arch/x86/boot/ctype.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_CTYPE_H
+#define BOOT_CTYPE_H
+
+static inline int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static inline int isxdigit(int ch)
+{
+ if (isdigit(ch))
+ return true;
+
+ if ((ch >= 'a') && (ch <= 'f'))
+ return true;
+
+ return (ch >= 'A') && (ch <= 'F');
+}
+
+#endif
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
new file mode 100644
index 000000000..023bf1c3d
--- /dev/null
+++ b/arch/x86/boot/early_serial_console.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Serial port routines for use during early boot reporting. This code is
+ * included from both the compressed kernel and the regular kernel.
+ */
+#include "boot.h"
+
+#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
+
+#define DLAB 0x80
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define RXR 0 /* Receive register (READ) */
+#define IER 1 /* Interrupt Enable */
+#define IIR 2 /* Interrupt ID */
+#define FCR 2 /* FIFO control */
+#define LCR 3 /* Line control */
+#define MCR 4 /* Modem control */
+#define LSR 5 /* Line Status */
+#define MSR 6 /* Modem Status */
+#define DLL 0 /* Divisor Latch Low */
+#define DLH 1 /* Divisor latch High */
+
+#define DEFAULT_BAUD 9600
+
+static void early_serial_init(int port, int baud)
+{
+ unsigned char c;
+ unsigned divisor;
+
+ outb(0x3, port + LCR); /* 8n1 */
+ outb(0, port + IER); /* no interrupt */
+ outb(0, port + FCR); /* no fifo */
+ outb(0x3, port + MCR); /* DTR + RTS */
+
+ divisor = 115200 / baud;
+ c = inb(port + LCR);
+ outb(c | DLAB, port + LCR);
+ outb(divisor & 0xff, port + DLL);
+ outb((divisor >> 8) & 0xff, port + DLH);
+ outb(c & ~DLAB, port + LCR);
+
+ early_serial_base = port;
+}
+
+static void parse_earlyprintk(void)
+{
+ int baud = DEFAULT_BAUD;
+ char arg[32];
+ int pos = 0;
+ int port = 0;
+
+ if (cmdline_find_option("earlyprintk", arg, sizeof(arg)) > 0) {
+ char *e;
+
+ if (!strncmp(arg, "serial", 6)) {
+ port = DEFAULT_SERIAL_PORT;
+ pos += 6;
+ }
+
+ if (arg[pos] == ',')
+ pos++;
+
+ /*
+ * make sure we have
+ * "serial,0x3f8,115200"
+ * "serial,ttyS0,115200"
+ * "ttyS0,115200"
+ */
+ if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
+ port = simple_strtoull(arg + pos, &e, 16);
+ if (port == 0 || arg + pos == e)
+ port = DEFAULT_SERIAL_PORT;
+ else
+ pos = e - arg;
+ } else if (!strncmp(arg + pos, "ttyS", 4)) {
+ static const int bases[] = { 0x3f8, 0x2f8 };
+ int idx = 0;
+
+ /* += strlen("ttyS"); */
+ pos += 4;
+
+ if (arg[pos++] == '1')
+ idx = 1;
+
+ port = bases[idx];
+ }
+
+ if (arg[pos] == ',')
+ pos++;
+
+ baud = simple_strtoull(arg + pos, &e, 0);
+ if (baud == 0 || arg + pos == e)
+ baud = DEFAULT_BAUD;
+ }
+
+ if (port)
+ early_serial_init(port, baud);
+}
+
+#define BASE_BAUD (1843200/16)
+static unsigned int probe_baud(int port)
+{
+ unsigned char lcr, dll, dlh;
+ unsigned int quot;
+
+ lcr = inb(port + LCR);
+ outb(lcr | DLAB, port + LCR);
+ dll = inb(port + DLL);
+ dlh = inb(port + DLH);
+ outb(lcr, port + LCR);
+ quot = (dlh << 8) | dll;
+
+ return BASE_BAUD / quot;
+}
+
+static void parse_console_uart8250(void)
+{
+ char optstr[64], *options;
+ int baud = DEFAULT_BAUD;
+ int port = 0;
+
+ /*
+ * console=uart8250,io,0x3f8,115200n8
+ * need to make sure it is last one console !
+ */
+ if (cmdline_find_option("console", optstr, sizeof(optstr)) <= 0)
+ return;
+
+ options = optstr;
+
+ if (!strncmp(options, "uart8250,io,", 12))
+ port = simple_strtoull(options + 12, &options, 0);
+ else if (!strncmp(options, "uart,io,", 8))
+ port = simple_strtoull(options + 8, &options, 0);
+ else
+ return;
+
+ if (options && (options[0] == ','))
+ baud = simple_strtoull(options + 1, &options, 0);
+ else
+ baud = probe_baud(port);
+
+ if (port)
+ early_serial_init(port, baud);
+}
+
+void console_init(void)
+{
+ parse_earlyprintk();
+
+ if (!early_serial_base)
+ parse_console_uart8250();
+}
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
new file mode 100644
index 000000000..1fb4bc70c
--- /dev/null
+++ b/arch/x86/boot/edd.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Get EDD BIOS disk information
+ */
+
+#include "boot.h"
+#include <linux/edd.h>
+#include "string.h"
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+
+/*
+ * Read the MBR (first sector) from a specific device.
+ */
+static int read_mbr(u8 devno, void *buf)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0x0201; /* Legacy Read, one sector */
+ ireg.cx = 0x0001; /* Sector 0-0-1 */
+ ireg.dl = devno;
+ ireg.bx = (size_t)buf;
+
+ intcall(0x13, &ireg, &oreg);
+
+ return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
+}
+
+static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
+{
+ int sector_size;
+ char *mbrbuf_ptr, *mbrbuf_end;
+ u32 buf_base, mbr_base;
+ extern char _end[];
+ u16 mbr_magic;
+
+ sector_size = ei->params.bytes_per_sector;
+ if (!sector_size)
+ sector_size = 512; /* Best available guess */
+
+ /* Produce a naturally aligned buffer on the heap */
+ buf_base = (ds() << 4) + (u32)&_end;
+ mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
+ mbrbuf_ptr = _end + (mbr_base-buf_base);
+ mbrbuf_end = mbrbuf_ptr + sector_size;
+
+ /* Make sure we actually have space on the heap... */
+ if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
+ return -1;
+ if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
+ return -1;
+
+ memset(mbrbuf_ptr, 0, sector_size);
+ if (read_mbr(devno, mbrbuf_ptr))
+ return -1;
+
+ *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
+ mbr_magic = *(u16 *)&mbrbuf_ptr[510];
+
+ /* check for valid MBR magic */
+ return mbr_magic == 0xAA55 ? 0 : -1;
+}
+
+static int get_edd_info(u8 devno, struct edd_info *ei)
+{
+ struct biosregs ireg, oreg;
+
+ memset(ei, 0, sizeof(*ei));
+
+ /* Check Extensions Present */
+
+ initregs(&ireg);
+ ireg.ah = 0x41;
+ ireg.bx = EDDMAGIC1;
+ ireg.dl = devno;
+ intcall(0x13, &ireg, &oreg);
+
+ if (oreg.eflags & X86_EFLAGS_CF)
+ return -1; /* No extended information */
+
+ if (oreg.bx != EDDMAGIC2)
+ return -1;
+
+ ei->device = devno;
+ ei->version = oreg.ah; /* EDD version number */
+ ei->interface_support = oreg.cx; /* EDD functionality subsets */
+
+ /* Extended Get Device Parameters */
+
+ ei->params.length = sizeof(ei->params);
+ ireg.ah = 0x48;
+ ireg.si = (size_t)&ei->params;
+ intcall(0x13, &ireg, &oreg);
+
+ /* Get legacy CHS parameters */
+
+ /* Ralf Brown recommends setting ES:DI to 0:0 */
+ ireg.ah = 0x08;
+ ireg.es = 0;
+ intcall(0x13, &ireg, &oreg);
+
+ if (!(oreg.eflags & X86_EFLAGS_CF)) {
+ ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+ ei->legacy_max_head = oreg.dh;
+ ei->legacy_sectors_per_track = oreg.cl & 0x3f;
+ }
+
+ return 0;
+}
+
+void query_edd(void)
+{
+ char eddarg[8];
+ int do_mbr = 1;
+#ifdef CONFIG_EDD_OFF
+ int do_edd = 0;
+#else
+ int do_edd = 1;
+#endif
+ int be_quiet;
+ int devno;
+ struct edd_info ei, *edp;
+ u32 *mbrptr;
+
+ if (cmdline_find_option("edd", eddarg, sizeof(eddarg)) > 0) {
+ if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
+ do_edd = 1;
+ do_mbr = 0;
+ }
+ else if (!strcmp(eddarg, "off"))
+ do_edd = 0;
+ else if (!strcmp(eddarg, "on"))
+ do_edd = 1;
+ }
+
+ be_quiet = cmdline_find_option_bool("quiet");
+
+ edp = boot_params.eddbuf;
+ mbrptr = boot_params.edd_mbr_sig_buffer;
+
+ if (!do_edd)
+ return;
+
+ /* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
+ * so give a hint if this happens.
+ */
+
+ if (!be_quiet)
+ printf("Probing EDD (edd=off to disable)... ");
+
+ for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
+ /*
+ * Scan the BIOS-supported hard disks and query EDD
+ * information...
+ */
+ if (!get_edd_info(devno, &ei)
+ && boot_params.eddbuf_entries < EDDMAXNR) {
+ memcpy(edp, &ei, sizeof(ei));
+ edp++;
+ boot_params.eddbuf_entries++;
+ }
+
+ if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
+ boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
+ }
+
+ if (!be_quiet)
+ printf("ok\n");
+}
+
+#endif
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
new file mode 100644
index 000000000..c9299aeb7
--- /dev/null
+++ b/arch/x86/boot/genimage.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2017 by Changbin Du <changbin.du@intel.com>
+#
+# Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others
+#
+# "make fdimage/fdimage144/fdimage288/hdimage/isoimage"
+# script for x86 architecture
+#
+# Arguments:
+# $1 - fdimage format
+# $2 - target image file
+# $3 - kernel bzImage file
+# $4 - mtools configuration file
+# $5 - kernel cmdline
+# $6+ - initrd image file(s)
+#
+# This script requires:
+# bash
+# syslinux
+# mtools (for fdimage* and hdimage)
+# edk2/OVMF (for hdimage)
+#
+# Otherwise try to stick to POSIX shell commands...
+#
+
+# Use "make V=1" to debug this script
+case "${KBUILD_VERBOSE}" in
+*1*)
+ set -x
+ ;;
+esac
+
+# Exit the top-level shell with an error
+topshell=$$
+trap 'exit 1' USR1
+die() {
+ echo "" 1>&2
+ echo " *** $*" 1>&2
+ echo "" 1>&2
+ kill -USR1 $topshell
+}
+
+# Verify the existence and readability of a file
+verify() {
+ if [ ! -f "$1" -o ! -r "$1" ]; then
+ die "Missing file: $1"
+ fi
+}
+
+diskfmt="$1"
+FIMAGE="$2"
+FBZIMAGE="$3"
+MTOOLSRC="$4"
+KCMDLINE="$5"
+shift 5 # Remaining arguments = initrd files
+
+export MTOOLSRC
+
+# common options for dd
+dd='dd iflag=fullblock'
+
+# Make sure the files actually exist
+verify "$FBZIMAGE"
+
+declare -a FDINITRDS
+irdpfx=' initrd='
+initrdopts_syslinux=''
+initrdopts_efi=''
+for f in "$@"; do
+ if [ -f "$f" -a -r "$f" ]; then
+ FDINITRDS=("${FDINITRDS[@]}" "$f")
+ fname="$(basename "$f")"
+ initrdopts_syslinux="${initrdopts_syslinux}${irdpfx}${fname}"
+ irdpfx=,
+ initrdopts_efi="${initrdopts_efi} initrd=${fname}"
+ fi
+done
+
+# Read a $3-byte littleendian unsigned value at offset $2 from file $1
+le() {
+ local n=0
+ local m=1
+ for b in $(od -A n -v -j $2 -N $3 -t u1 "$1"); do
+ n=$((n + b*m))
+ m=$((m * 256))
+ done
+ echo $n
+}
+
+# Get the EFI architecture name such that boot{name}.efi is the default
+# boot file name. Returns false with no output if the file is not an
+# EFI image or otherwise unknown.
+efiarch() {
+ [ -f "$1" ] || return
+ [ $(le "$1" 0 2) -eq 23117 ] || return # MZ magic
+ peoffs=$(le "$1" 60 4) # PE header offset
+ [ $peoffs -ge 64 ] || return
+ [ $(le "$1" $peoffs 4) -eq 17744 ] || return # PE magic
+ case $(le "$1" $((peoffs+4+20)) 2) in # PE type
+ 267) ;; # PE32
+ 523) ;; # PE32+
+ *) return 1 ;; # Invalid
+ esac
+ [ $(le "$1" $((peoffs+4+20+68)) 2) -eq 10 ] || return # EFI app
+ case $(le "$1" $((peoffs+4)) 2) in # Machine type
+ 332) echo i386 ;;
+ 450) echo arm ;;
+ 512) echo ia64 ;;
+ 20530) echo riscv32 ;;
+ 20580) echo riscv64 ;;
+ 20776) echo riscv128 ;;
+ 34404) echo x64 ;;
+ 43620) echo aa64 ;;
+ esac
+}
+
+# Get the combined sizes in bytes of the files given, counting sparse
+# files as full length, and padding each file to cluster size
+cluster=16384
+filesizes() {
+ local t=0
+ local s
+ for s in $(ls -lnL "$@" 2>/dev/null | awk '/^-/{ print $5; }'); do
+ t=$((t + ((s+cluster-1)/cluster)*cluster))
+ done
+ echo $t
+}
+
+# Expand directory names which should be in /usr/share into a list
+# of possible alternatives
+sharedirs() {
+ local dir file
+ for dir in /usr/share /usr/lib64 /usr/lib; do
+ for file; do
+ echo "$dir/$file"
+ echo "$dir/${file^^}"
+ done
+ done
+}
+efidirs() {
+ local dir file
+ for dir in /usr/share /boot /usr/lib64 /usr/lib; do
+ for file; do
+ echo "$dir/$file"
+ echo "$dir/${file^^}"
+ done
+ done
+}
+
+findsyslinux() {
+ local f="$(find -L $(sharedirs syslinux isolinux) \
+ -name "$1" -readable -type f -print -quit 2>/dev/null)"
+ if [ ! -f "$f" ]; then
+ die "Need a $1 file, please install syslinux/isolinux."
+ fi
+ echo "$f"
+ return 0
+}
+
+findovmf() {
+ local arch="$1"
+ shift
+ local -a names=(-false)
+ local name f
+ for name; do
+ names=("${names[@]}" -or -iname "$name")
+ done
+ for f in $(find -L $(efidirs edk2 ovmf) \
+ \( "${names[@]}" \) -readable -type f \
+ -print 2>/dev/null); do
+ if [ "$(efiarch "$f")" = "$arch" ]; then
+ echo "$f"
+ return 0
+ fi
+ done
+ die "Need a $1 file for $arch, please install EDK2/OVMF."
+}
+
+do_mcopy() {
+ if [ ${#FDINITRDS[@]} -gt 0 ]; then
+ mcopy "${FDINITRDS[@]}" "$1"
+ fi
+ if [ -n "$efishell" ]; then
+ mmd "$1"EFI "$1"EFI/Boot
+ mcopy "$efishell" "$1"EFI/Boot/boot${kefiarch}.efi
+ fi
+ if [ -n "$kefiarch" ]; then
+ echo linux "$KCMDLINE$initrdopts_efi" | \
+ mcopy - "$1"startup.nsh
+ fi
+ echo default linux "$KCMDLINE$initrdopts_syslinux" | \
+ mcopy - "$1"syslinux.cfg
+ mcopy "$FBZIMAGE" "$1"linux
+}
+
+genbzdisk() {
+ verify "$MTOOLSRC"
+ mformat -v 'LINUX_BOOT' a:
+ syslinux "$FIMAGE"
+ do_mcopy a:
+}
+
+genfdimage144() {
+ verify "$MTOOLSRC"
+ $dd if=/dev/zero of="$FIMAGE" bs=1024 count=1440 2>/dev/null
+ mformat -v 'LINUX_BOOT' v:
+ syslinux "$FIMAGE"
+ do_mcopy v:
+}
+
+genfdimage288() {
+ verify "$MTOOLSRC"
+ $dd if=/dev/zero of="$FIMAGE" bs=1024 count=2880 2>/dev/null
+ mformat -v 'LINUX_BOOT' w:
+ syslinux "$FIMAGE"
+ do_mcopy w:
+}
+
+genhdimage() {
+ verify "$MTOOLSRC"
+ mbr="$(findsyslinux mbr.bin)"
+ kefiarch="$(efiarch "$FBZIMAGE")"
+ if [ -n "$kefiarch" ]; then
+ # The efishell provides command line handling
+ efishell="$(findovmf $kefiarch shell.efi shell${kefiarch}.efi)"
+ ptype='-T 0xef' # EFI system partition, no GPT
+ fi
+ sizes=$(filesizes "$FBZIMAGE" "${FDINITRDS[@]}" "$efishell")
+ # Allow 1% + 2 MiB for filesystem and partition table overhead,
+ # syslinux, and config files; this is probably excessive...
+ megs=$(((sizes + sizes/100 + 2*1024*1024 - 1)/(1024*1024)))
+ $dd if=/dev/zero of="$FIMAGE" bs=$((1024*1024)) count=$megs 2>/dev/null
+ mpartition -I -c -s 32 -h 64 $ptype -b 64 -a p:
+ $dd if="$mbr" of="$FIMAGE" bs=440 count=1 conv=notrunc 2>/dev/null
+ mformat -v 'LINUX_BOOT' -s 32 -h 64 -c $((cluster/512)) -t $megs h:
+ syslinux --offset $((64*512)) "$FIMAGE"
+ do_mcopy h:
+}
+
+geniso() {
+ tmp_dir="$(dirname "$FIMAGE")/isoimage"
+ rm -rf "$tmp_dir"
+ mkdir "$tmp_dir"
+ isolinux=$(findsyslinux isolinux.bin)
+ ldlinux=$(findsyslinux ldlinux.c32)
+ cp "$isolinux" "$ldlinux" "$tmp_dir"
+ cp "$FBZIMAGE" "$tmp_dir"/linux
+ echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg
+ cp "${FDINITRDS[@]}" "$tmp_dir"/
+ genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \
+ -quiet -o "$FIMAGE" -b isolinux.bin \
+ -c boot.cat -no-emul-boot -boot-load-size 4 \
+ -boot-info-table "$tmp_dir"
+ isohybrid "$FIMAGE" 2>/dev/null || true
+ rm -rf "$tmp_dir"
+}
+
+rm -f "$FIMAGE"
+
+case "$diskfmt" in
+ bzdisk) genbzdisk;;
+ fdimage144) genfdimage144;;
+ fdimage288) genfdimage288;;
+ hdimage) genhdimage;;
+ isoimage) geniso;;
+ *) die "Unknown image format: $diskfmt";;
+esac
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
new file mode 100644
index 000000000..f912d7770
--- /dev/null
+++ b/arch/x86/boot/header.S
@@ -0,0 +1,660 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * header.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Based on bootsect.S and setup.S
+ * modified by more people than can be counted
+ *
+ * Rewritten as a common file by H. Peter Anvin (Apr 2007)
+ *
+ * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
+ * addresses must be multiplied by 16 to obtain their respective linear
+ * addresses. To avoid confusion, linear addresses are written using leading
+ * hex while segment addresses are written as segment:offset.
+ *
+ */
+#include <linux/pe.h>
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/page_types.h>
+#include <asm/setup.h>
+#include <asm/bootparam.h>
+#include "boot.h"
+#include "voffset.h"
+#include "zoffset.h"
+
+BOOTSEG = 0x07C0 /* original address of boot-sector */
+SYSSEG = 0x1000 /* historical load address >> 4 */
+
+#ifndef SVGA_MODE
+#define SVGA_MODE ASK_VGA
+#endif
+
+#ifndef ROOT_RDONLY
+#define ROOT_RDONLY 1
+#endif
+
+ .code16
+ .section ".bstext", "ax"
+
+ .global bootsect_start
+bootsect_start:
+#ifdef CONFIG_EFI_STUB
+ # "MZ", MS-DOS header
+ .word MZ_MAGIC
+#endif
+
+ # Normalize the start address
+ ljmp $BOOTSEG, $start2
+
+start2:
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ xorw %sp, %sp
+ sti
+ cld
+
+ movw $bugger_off_msg, %si
+
+msg_loop:
+ lodsb
+ andb %al, %al
+ jz bs_die
+ movb $0xe, %ah
+ movw $7, %bx
+ int $0x10
+ jmp msg_loop
+
+bs_die:
+ # Allow the user to press a key, then reboot
+ xorw %ax, %ax
+ int $0x16
+ int $0x19
+
+ # int 0x19 should never return. In case it does anyway,
+ # invoke the BIOS reset code...
+ ljmp $0xf000,$0xfff0
+
+#ifdef CONFIG_EFI_STUB
+ .org 0x3c
+ #
+ # Offset to the PE header.
+ #
+ .long pe_header
+#endif /* CONFIG_EFI_STUB */
+
+ .section ".bsdata", "a"
+bugger_off_msg:
+ .ascii "Use a boot loader.\r\n"
+ .ascii "\n"
+ .ascii "Remove disk and press any key to reboot...\r\n"
+ .byte 0
+
+#ifdef CONFIG_EFI_STUB
+pe_header:
+ .long PE_MAGIC
+
+coff_header:
+#ifdef CONFIG_X86_32
+ .set image_file_add_flags, IMAGE_FILE_32BIT_MACHINE
+ .set pe_opt_magic, PE_OPT_MAGIC_PE32
+ .word IMAGE_FILE_MACHINE_I386
+#else
+ .set image_file_add_flags, 0
+ .set pe_opt_magic, PE_OPT_MAGIC_PE32PLUS
+ .word IMAGE_FILE_MACHINE_AMD64
+#endif
+ .word section_count # nr_sections
+ .long 0 # TimeDateStamp
+ .long 0 # PointerToSymbolTable
+ .long 1 # NumberOfSymbols
+ .word section_table - optional_header # SizeOfOptionalHeader
+ .word IMAGE_FILE_EXECUTABLE_IMAGE | \
+ image_file_add_flags | \
+ IMAGE_FILE_DEBUG_STRIPPED | \
+ IMAGE_FILE_LINE_NUMS_STRIPPED # Characteristics
+
+optional_header:
+ .word pe_opt_magic
+ .byte 0x02 # MajorLinkerVersion
+ .byte 0x14 # MinorLinkerVersion
+
+ # Filled in by build.c
+ .long 0 # SizeOfCode
+
+ .long 0 # SizeOfInitializedData
+ .long 0 # SizeOfUninitializedData
+
+ # Filled in by build.c
+ .long 0x0000 # AddressOfEntryPoint
+
+ .long 0x0200 # BaseOfCode
+#ifdef CONFIG_X86_32
+ .long 0 # data
+#endif
+
+extra_header_fields:
+ # PE specification requires ImageBase to be 64k aligned
+ .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff
+#ifdef CONFIG_X86_32
+ .long image_base # ImageBase
+#else
+ .quad image_base # ImageBase
+#endif
+ .long 0x20 # SectionAlignment
+ .long 0x20 # FileAlignment
+ .word 0 # MajorOperatingSystemVersion
+ .word 0 # MinorOperatingSystemVersion
+ .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion
+ .word LINUX_EFISTUB_MINOR_VERSION # MinorImageVersion
+ .word 0 # MajorSubsystemVersion
+ .word 0 # MinorSubsystemVersion
+ .long 0 # Win32VersionValue
+
+ #
+ # The size of the bzImage is written in tools/build.c
+ #
+ .long 0 # SizeOfImage
+
+ .long 0x200 # SizeOfHeaders
+ .long 0 # CheckSum
+ .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
+#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES
+ .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
+#else
+ .word 0 # DllCharacteristics
+#endif
+#ifdef CONFIG_X86_32
+ .long 0 # SizeOfStackReserve
+ .long 0 # SizeOfStackCommit
+ .long 0 # SizeOfHeapReserve
+ .long 0 # SizeOfHeapCommit
+#else
+ .quad 0 # SizeOfStackReserve
+ .quad 0 # SizeOfStackCommit
+ .quad 0 # SizeOfHeapReserve
+ .quad 0 # SizeOfHeapCommit
+#endif
+ .long 0 # LoaderFlags
+ .long (section_table - .) / 8 # NumberOfRvaAndSizes
+
+ .quad 0 # ExportTable
+ .quad 0 # ImportTable
+ .quad 0 # ResourceTable
+ .quad 0 # ExceptionTable
+ .quad 0 # CertificationTable
+ .quad 0 # BaseRelocationTable
+
+ # Section table
+section_table:
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".setup"
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE | \
+ IMAGE_SCN_ALIGN_16BYTES # Characteristics
+
+ #
+ # The EFI application loader requires a relocation section
+ # because EFI applications must be relocatable. The .reloc
+ # offset & size fields are filled in by build.c.
+ #
+ .ascii ".reloc"
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0
+ .long 0 # SizeOfRawData
+ .long 0 # PointerToRawData
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_DISCARDABLE | \
+ IMAGE_SCN_ALIGN_1BYTES # Characteristics
+
+#ifdef CONFIG_EFI_MIXED
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .asciz ".compat"
+ .long 0
+ .long 0x0
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_DISCARDABLE | \
+ IMAGE_SCN_ALIGN_1BYTES # Characteristics
+#endif
+
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".text"
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE | \
+ IMAGE_SCN_ALIGN_16BYTES # Characteristics
+
+ .set section_count, (. - section_table) / 40
+#endif /* CONFIG_EFI_STUB */
+
+ # Kernel attributes; used by setup. This is part 1 of the
+ # header, from the old boot sector.
+
+ .section ".header", "a"
+ .globl sentinel
+sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
+
+ .globl hdr
+hdr:
+setup_sects: .byte 0 /* Filled in by build.c */
+root_flags: .word ROOT_RDONLY
+syssize: .long 0 /* Filled in by build.c */
+ram_size: .word 0 /* Obsolete */
+vid_mode: .word SVGA_MODE
+root_dev: .word 0 /* Filled in by build.c */
+boot_flag: .word 0xAA55
+
+ # offset 512, entry point
+
+ .globl _start
+_start:
+ # Explicitly enter this as bytes, or the assembler
+ # tries to generate a 3-byte jump here, which causes
+ # everything else to push off to the wrong offset.
+ .byte 0xeb # short (2-byte) jump
+ .byte start_of_setup-1f
+1:
+
+ # Part 2 of the header, from the old setup.S
+
+ .ascii "HdrS" # header signature
+ .word 0x020f # header version number (>= 0x0105)
+ # or else old loadlin-1.5 will fail)
+ .globl realmode_swtch
+realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
+start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
+ # in case something decided to "use" it
+ .word kernel_version-512 # pointing to kernel version string
+ # above section of header is compatible
+ # with loadlin-1.5 (header v1.5). Don't
+ # change it.
+
+type_of_loader: .byte 0 # 0 means ancient bootloader, newer
+ # bootloaders know to change this.
+ # See Documentation/x86/boot.rst for
+ # assigned ids
+
+# flags, unused bits must be zero (RFU) bit within loadflags
+loadflags:
+ .byte LOADED_HIGH # The kernel is to be loaded high
+
+setup_move_size: .word 0x8000 # size to move, when setup is not
+ # loaded at 0x90000. We will move setup
+ # to 0x90000 then just before jumping
+ # into the kernel. However, only the
+ # loader knows how much data behind
+ # us also needs to be loaded.
+
+code32_start: # here loaders can put a different
+ # start address for 32-bit code.
+ .long 0x100000 # 0x100000 = default for big kernel
+
+ramdisk_image: .long 0 # address of loaded ramdisk image
+ # Here the loader puts the 32-bit
+ # address where it loaded the image.
+ # This only will be read by the kernel.
+
+ramdisk_size: .long 0 # its size in bytes
+
+bootsect_kludge:
+ .long 0 # obsolete
+
+heap_end_ptr: .word _end+STACK_SIZE-512
+ # (Header version 0x0201 or later)
+ # space from here (exclusive) down to
+ # end of setup code can be used by setup
+ # for local heap purposes.
+
+ext_loader_ver:
+ .byte 0 # Extended boot loader version
+ext_loader_type:
+ .byte 0 # Extended boot loader type
+
+cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
+ # If nonzero, a 32-bit pointer
+ # to the kernel command line.
+ # The command line should be
+ # located between the start of
+ # setup and the end of low
+ # memory (0xa0000), or it may
+ # get overwritten before it
+ # gets read. If this field is
+ # used, there is no longer
+ # anything magical about the
+ # 0x90000 segment; the setup
+ # can be located anywhere in
+ # low memory 0x10000 or higher.
+
+initrd_addr_max: .long 0x7fffffff
+ # (Header version 0x0203 or later)
+ # The highest safe address for
+ # the contents of an initrd
+ # The current kernel allows up to 4 GB,
+ # but leave it at 2 GB to avoid
+ # possible bootloader bugs.
+
+kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
+ #required for protected mode
+ #kernel
+#ifdef CONFIG_RELOCATABLE
+relocatable_kernel: .byte 1
+#else
+relocatable_kernel: .byte 0
+#endif
+min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
+
+xloadflags:
+#ifdef CONFIG_X86_64
+# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
+#else
+# define XLF0 0
+#endif
+
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ /* kernel/boot_param/ramdisk could be loaded above 4g */
+# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
+#else
+# define XLF1 0
+#endif
+
+#ifdef CONFIG_EFI_STUB
+# ifdef CONFIG_EFI_MIXED
+# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
+# else
+# ifdef CONFIG_X86_64
+# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+# else
+# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+# endif
+# endif
+#else
+# define XLF23 0
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE)
+# define XLF4 XLF_EFI_KEXEC
+#else
+# define XLF4 0
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_5LEVEL
+#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
+#else
+#define XLF56 XLF_5LEVEL
+#endif
+#else
+#define XLF56 0
+#endif
+
+ .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56
+
+cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
+ #added with boot protocol
+ #version 2.06
+
+hardware_subarch: .long 0 # subarchitecture, added with 2.07
+ # default to 0 for normal x86 PC
+
+hardware_subarch_data: .quad 0
+
+payload_offset: .long ZO_input_data
+payload_length: .long ZO_z_input_len
+
+setup_data: .quad 0 # 64-bit physical pointer to
+ # single linked list of
+ # struct setup_data
+
+pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
+
+#
+# Getting to provably safe in-place decompression is hard. Worst case
+# behaviours need to be analyzed. Here let's take the decompression of
+# a gzip-compressed kernel as example, to illustrate it:
+#
+# The file layout of gzip compressed kernel is:
+#
+# magic[2]
+# method[1]
+# flags[1]
+# timestamp[4]
+# extraflags[1]
+# os[1]
+# compressed data blocks[N]
+# crc[4] orig_len[4]
+#
+# ... resulting in +18 bytes overhead of uncompressed data.
+#
+# (For more information, please refer to RFC 1951 and RFC 1952.)
+#
+# Files divided into blocks
+# 1 bit (last block flag)
+# 2 bits (block type)
+#
+# 1 block occurs every 32K -1 bytes or when there 50% compression
+# has been achieved. The smallest block type encoding is always used.
+#
+# stored:
+# 32 bits length in bytes.
+#
+# fixed:
+# magic fixed tree.
+# symbols.
+#
+# dynamic:
+# dynamic tree encoding.
+# symbols.
+#
+#
+# The buffer for decompression in place is the length of the uncompressed
+# data, plus a small amount extra to keep the algorithm safe. The
+# compressed data is placed at the end of the buffer. The output pointer
+# is placed at the start of the buffer and the input pointer is placed
+# where the compressed data starts. Problems will occur when the output
+# pointer overruns the input pointer.
+#
+# The output pointer can only overrun the input pointer if the input
+# pointer is moving faster than the output pointer. A condition only
+# triggered by data whose compressed form is larger than the uncompressed
+# form.
+#
+# The worst case at the block level is a growth of the compressed data
+# of 5 bytes per 32767 bytes.
+#
+# The worst case internal to a compressed block is very hard to figure.
+# The worst case can at least be bounded by having one bit that represents
+# 32764 bytes and then all of the rest of the bytes representing the very
+# very last byte.
+#
+# All of which is enough to compute an amount of extra data that is required
+# to be safe. To avoid problems at the block level allocating 5 extra bytes
+# per 32767 bytes of data is sufficient. To avoid problems internal to a
+# block adding an extra 32767 bytes (the worst case uncompressed block size)
+# is sufficient, to ensure that in the worst case the decompressed data for
+# block will stop the byte before the compressed data for a block begins.
+# To avoid problems with the compressed data's meta information an extra 18
+# bytes are needed. Leading to the formula:
+#
+# extra_bytes = (uncompressed_size >> 12) + 32768 + 18
+#
+# Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+# Adding 32768 instead of 32767 just makes for round numbers.
+#
+# Above analysis is for decompressing gzip compressed kernel only. Up to
+# now 6 different decompressor are supported all together. And among them
+# xz stores data in chunks and has maximum chunk of 64K. Hence safety
+# margin should be updated to cover all decompressors so that we don't
+# need to deal with each of them separately. Please check
+# the description in lib/decompressor_xxx.c for specific information.
+#
+# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
+#
+# LZ4 is even worse: data that cannot be further compressed grows by 0.4%,
+# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as
+# the size-dependent part now grows so fast.
+#
+# extra_bytes = (uncompressed_size >> 8) + 65536
+#
+# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22
+# byte fixed overhead but has a maximum block size of 128K, so it needs a
+# larger margin.
+#
+# extra_bytes = (uncompressed_size >> 8) + 131072
+
+#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 131072)
+#if ZO_z_output_len > ZO_z_input_len
+# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \
+ ZO_z_input_len)
+#else
+# define ZO_z_extract_offset ZO_z_extra_bytes
+#endif
+
+/*
+ * The extract_offset has to be bigger than ZO head section. Otherwise when
+ * the head code is running to move ZO to the end of the buffer, it will
+ * overwrite the head code itself.
+ */
+#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset
+# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095)
+#else
+# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095)
+#endif
+
+#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset)
+
+#define VO_INIT_SIZE (VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+# define INIT_SIZE ZO_INIT_SIZE
+#else
+# define INIT_SIZE VO_INIT_SIZE
+#endif
+
+init_size: .long INIT_SIZE # kernel initialization size
+handover_offset: .long 0 # Filled in by build.c
+kernel_info_offset: .long 0 # Filled in by build.c
+
+# End of setup header #####################################################
+
+ .section ".entrytext", "ax"
+start_of_setup:
+# Force %es = %ds
+ movw %ds, %ax
+ movw %ax, %es
+ cld
+
+# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
+# which happened to work by accident for the old code. Recalculate the stack
+# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
+# stack behind its own code, so we can't blindly put it directly past the heap.
+
+ movw %ss, %dx
+ cmpw %ax, %dx # %ds == %ss?
+ movw %sp, %dx
+ je 2f # -> assume %sp is reasonably set
+
+ # Invalid %ss, make up a new stack
+ movw $_end, %dx
+ testb $CAN_USE_HEAP, loadflags
+ jz 1f
+ movw heap_end_ptr, %dx
+1: addw $STACK_SIZE, %dx
+ jnc 2f
+ xorw %dx, %dx # Prevent wraparound
+
+2: # Now %dx should point to the end of our stack space
+ andw $~3, %dx # dword align (might as well...)
+ jnz 3f
+ movw $0xfffc, %dx # Make sure we're not zero
+3: movw %ax, %ss
+ movzwl %dx, %esp # Clear upper half of %esp
+ sti # Now we should have a working stack
+
+# We will have entered with %cs = %ds+0x20, normalize %cs so
+# it is on par with the other segments.
+ pushw %ds
+ pushw $6f
+ lretw
+6:
+
+# Check signature at end of setup
+ cmpl $0x5a5aaa55, setup_sig
+ jne setup_bad
+
+# Zero the bss
+ movw $__bss_start, %di
+ movw $_end+3, %cx
+ xorl %eax, %eax
+ subw %di, %cx
+ shrw $2, %cx
+ rep; stosl
+
+# Jump to C code (should not return)
+ calll main
+
+# Setup corrupt somehow...
+setup_bad:
+ movl $setup_corrupt, %eax
+ calll puts
+ # Fall through...
+
+ .globl die
+ .type die, @function
+die:
+ hlt
+ jmp die
+
+ .size die, .-die
+
+ .section ".initdata", "a"
+setup_corrupt:
+ .byte 7
+ .string "No setup signature found...\n"
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
new file mode 100755
index 000000000..0849f4b42
--- /dev/null
+++ b/arch/x86/boot/install.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+
+if [ -f $4/vmlinuz ]; then
+ mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+ mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+if [ -x /sbin/lilo ]; then
+ /sbin/lilo
+elif [ -x /etc/lilo/install ]; then
+ /etc/lilo/install
+else
+ sync
+ echo "Cannot find LILO."
+fi
diff --git a/arch/x86/boot/io.h b/arch/x86/boot/io.h
new file mode 100644
index 000000000..110880907
--- /dev/null
+++ b/arch/x86/boot/io.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_IO_H
+#define BOOT_IO_H
+
+#include <asm/shared/io.h>
+
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+
+struct port_io_ops {
+ u8 (*f_inb)(u16 port);
+ void (*f_outb)(u8 v, u16 port);
+ void (*f_outw)(u16 v, u16 port);
+};
+
+extern struct port_io_ops pio_ops;
+
+/*
+ * Use the normal I/O instructions by default.
+ * TDX guests override these to use hypercalls.
+ */
+static inline void init_default_io_ops(void)
+{
+ pio_ops.f_inb = __inb;
+ pio_ops.f_outb = __outb;
+ pio_ops.f_outw = __outw;
+}
+
+/*
+ * Redirect port I/O operations via pio_ops callbacks.
+ * TDX guests override these callbacks with TDX-specific helpers.
+ */
+#define inb pio_ops.f_inb
+#define outb pio_ops.f_outb
+#define outw pio_ops.f_outw
+
+#endif
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
new file mode 100644
index 000000000..c4ea5258a
--- /dev/null
+++ b/arch/x86/boot/main.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Main module for the real-mode kernel code
+ */
+#include <linux/build_bug.h>
+
+#include "boot.h"
+#include "string.h"
+
+struct boot_params boot_params __attribute__((aligned(16)));
+
+struct port_io_ops pio_ops;
+
+char *HEAP = _end;
+char *heap_end = _end; /* Default end of heap = no heap */
+
+/*
+ * Copy the header into the boot parameter block. Since this
+ * screws up the old-style command line protocol, adjust by
+ * filling in the new-style command line pointer instead.
+ */
+
+static void copy_boot_params(void)
+{
+ struct old_cmdline {
+ u16 cl_magic;
+ u16 cl_offset;
+ };
+ const struct old_cmdline * const oldcmd =
+ absolute_pointer(OLD_CL_ADDRESS);
+
+ BUILD_BUG_ON(sizeof(boot_params) != 4096);
+ memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
+
+ if (!boot_params.hdr.cmd_line_ptr &&
+ oldcmd->cl_magic == OLD_CL_MAGIC) {
+ /* Old-style command line protocol. */
+ u16 cmdline_seg;
+
+ /* Figure out if the command line falls in the region
+ of memory that an old kernel would have copied up
+ to 0x90000... */
+ if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
+ cmdline_seg = ds();
+ else
+ cmdline_seg = 0x9000;
+
+ boot_params.hdr.cmd_line_ptr =
+ (cmdline_seg << 4) + oldcmd->cl_offset;
+ }
+}
+
+/*
+ * Query the keyboard lock status as given by the BIOS, and
+ * set the keyboard repeat rate to maximum. Unclear why the latter
+ * is done here; this might be possible to kill off as stale code.
+ */
+static void keyboard_init(void)
+{
+ struct biosregs ireg, oreg;
+ initregs(&ireg);
+
+ ireg.ah = 0x02; /* Get keyboard status */
+ intcall(0x16, &ireg, &oreg);
+ boot_params.kbd_status = oreg.al;
+
+ ireg.ax = 0x0305; /* Set keyboard repeat rate */
+ intcall(0x16, &ireg, NULL);
+}
+
+/*
+ * Get Intel SpeedStep (IST) information.
+ */
+static void query_ist(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* Some older BIOSes apparently crash on this call, so filter
+ it from machines too old to have SpeedStep at all. */
+ if (cpu.level < 6)
+ return;
+
+ initregs(&ireg);
+ ireg.ax = 0xe980; /* IST Support */
+ ireg.edx = 0x47534943; /* Request value */
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.ist_info.signature = oreg.eax;
+ boot_params.ist_info.command = oreg.ebx;
+ boot_params.ist_info.event = oreg.ecx;
+ boot_params.ist_info.perf_level = oreg.edx;
+}
+
+/*
+ * Tell the BIOS what CPU mode we intend to run in.
+ */
+static void set_bios_mode(void)
+{
+#ifdef CONFIG_X86_64
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.ax = 0xec00;
+ ireg.bx = 2;
+ intcall(0x15, &ireg, NULL);
+#endif
+}
+
+static void init_heap(void)
+{
+ char *stack_end;
+
+ if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
+ asm("leal %P1(%%esp),%0"
+ : "=r" (stack_end) : "i" (-STACK_SIZE));
+
+ heap_end = (char *)
+ ((size_t)boot_params.hdr.heap_end_ptr + 0x200);
+ if (heap_end > stack_end)
+ heap_end = stack_end;
+ } else {
+ /* Boot protocol 2.00 only, no heap available */
+ puts("WARNING: Ancient bootloader, some functionality "
+ "may be limited!\n");
+ }
+}
+
+void main(void)
+{
+ init_default_io_ops();
+
+ /* First, copy the boot header into the "zeropage" */
+ copy_boot_params();
+
+ /* Initialize the early-boot console */
+ console_init();
+ if (cmdline_find_option_bool("debug"))
+ puts("early console in setup code\n");
+
+ /* End of heap check */
+ init_heap();
+
+ /* Make sure we have all the proper CPU support */
+ if (validate_cpu()) {
+ puts("Unable to boot - please use a kernel appropriate "
+ "for your CPU.\n");
+ die();
+ }
+
+ /* Tell the BIOS what CPU mode we intend to run in. */
+ set_bios_mode();
+
+ /* Detect memory layout */
+ detect_memory();
+
+ /* Set keyboard repeat rate (why?) and query the lock flags */
+ keyboard_init();
+
+ /* Query Intel SpeedStep (IST) information */
+ query_ist();
+
+ /* Query APM information */
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+ query_apm_bios();
+#endif
+
+ /* Query EDD information */
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+ query_edd();
+#endif
+
+ /* Set the video mode */
+ set_video();
+
+ /* Do the last things and invoke protected mode */
+ go_to_protected_mode();
+}
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
new file mode 100644
index 000000000..b0422b79d
--- /dev/null
+++ b/arch/x86/boot/memory.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Memory detection code
+ */
+
+#include "boot.h"
+
+#define SMAP 0x534d4150 /* ASCII "SMAP" */
+
+static void detect_memory_e820(void)
+{
+ int count = 0;
+ struct biosregs ireg, oreg;
+ struct boot_e820_entry *desc = boot_params.e820_table;
+ static struct boot_e820_entry buf; /* static so it is zeroed */
+
+ initregs(&ireg);
+ ireg.ax = 0xe820;
+ ireg.cx = sizeof(buf);
+ ireg.edx = SMAP;
+ ireg.di = (size_t)&buf;
+
+ /*
+ * Note: at least one BIOS is known which assumes that the
+ * buffer pointed to by one e820 call is the same one as
+ * the previous call, and only changes modified fields. Therefore,
+ * we use a temporary buffer and copy the results entry by entry.
+ *
+ * This routine deliberately does not try to account for
+ * ACPI 3+ extended attributes. This is because there are
+ * BIOSes in the field which report zero for the valid bit for
+ * all ranges, and we don't currently make any use of the
+ * other attribute bits. Revisit this if we see the extended
+ * attribute bits deployed in a meaningful way in the future.
+ */
+
+ do {
+ intcall(0x15, &ireg, &oreg);
+ ireg.ebx = oreg.ebx; /* for next iteration... */
+
+ /* BIOSes which terminate the chain with CF = 1 as opposed
+ to %ebx = 0 don't always report the SMAP signature on
+ the final, failing, probe. */
+ if (oreg.eflags & X86_EFLAGS_CF)
+ break;
+
+ /* Some BIOSes stop returning SMAP in the middle of
+ the search loop. We don't know exactly how the BIOS
+ screwed up the map at that point, we might have a
+ partial map, the full map, or complete garbage, so
+ just return failure. */
+ if (oreg.eax != SMAP) {
+ count = 0;
+ break;
+ }
+
+ *desc++ = buf;
+ count++;
+ } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_table));
+
+ boot_params.e820_entries = count;
+}
+
+static void detect_memory_e801(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0xe801;
+ intcall(0x15, &ireg, &oreg);
+
+ if (oreg.eflags & X86_EFLAGS_CF)
+ return;
+
+ /* Do we really need to do this? */
+ if (oreg.cx || oreg.dx) {
+ oreg.ax = oreg.cx;
+ oreg.bx = oreg.dx;
+ }
+
+ if (oreg.ax > 15*1024) {
+ return; /* Bogus! */
+ } else if (oreg.ax == 15*1024) {
+ boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
+ } else {
+ /*
+ * This ignores memory above 16MB if we have a memory
+ * hole there. If someone actually finds a machine
+ * with a memory hole at 16MB and no support for
+ * 0E820h they should probably generate a fake e820
+ * map.
+ */
+ boot_params.alt_mem_k = oreg.ax;
+ }
+}
+
+static void detect_memory_88(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x88;
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.screen_info.ext_mem_k = oreg.ax;
+}
+
+void detect_memory(void)
+{
+ detect_memory_e820();
+
+ detect_memory_e801();
+
+ detect_memory_88();
+}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 000000000..da0ccc5de
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../include/asm/required-features.h"
+#include "../include/asm/disabled-features.h"
+#include "../include/asm/cpufeatures.h"
+#include "../include/asm/vmxfeatures.h"
+#include "../kernel/cpu/capflags.c"
+
+int main(void)
+{
+ int i, j;
+ const char *str;
+
+ printf("static const char x86_cap_strs[] =\n");
+
+ for (i = 0; i < NCAPINTS; i++) {
+ for (j = 0; j < 32; j++) {
+ str = x86_cap_flags[i*32+j];
+
+ if (i == NCAPINTS-1 && j == 31) {
+ /* The last entry must be unconditional; this
+ also consumes the compiler-added null
+ character */
+ if (!str)
+ str = "";
+ printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
+ i, j, str);
+ } else if (str) {
+ printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+ "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
+ "#endif\n",
+ i, j, i, j, str);
+ }
+ }
+ }
+ printf("\t;\n");
+ return 0;
+}
diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h
new file mode 100644
index 000000000..aed66f7ae
--- /dev/null
+++ b/arch/x86/boot/msr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers/definitions related to MSR access.
+ */
+
+#ifndef BOOT_MSR_H
+#define BOOT_MSR_H
+
+#include <asm/shared/msr.h>
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void boot_rdmsr(unsigned int reg, struct msr *m)
+{
+ asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void boot_wrmsr(unsigned int reg, const struct msr *m)
+{
+ asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* BOOT_MSR_H */
diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in
new file mode 100644
index 000000000..174c60508
--- /dev/null
+++ b/arch/x86/boot/mtools.conf.in
@@ -0,0 +1,21 @@
+#
+# mtools configuration file for "make (b)zdisk"
+#
+
+# Actual floppy drive
+drive a:
+ file="/dev/fd0"
+
+# 1.44 MB floppy disk image
+drive v:
+ file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
+
+# 2.88 MB floppy disk image (mostly for virtual uses)
+drive w:
+ file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
+
+# Hard disk (h: for the filesystem, p: for format - old mtools bug?)
+drive h:
+ file="@OBJ@/hdimage" offset=32768 mformat_only
+drive p:
+ file="@OBJ@/hdimage" partition=1 mformat_only
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
new file mode 100644
index 000000000..40031a614
--- /dev/null
+++ b/arch/x86/boot/pm.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Prepare the machine for transition to protected mode.
+ */
+
+#include "boot.h"
+#include <asm/segment.h>
+
+/*
+ * Invoke the realmode switch hook if present; otherwise
+ * disable all interrupts.
+ */
+static void realmode_switch_hook(void)
+{
+ if (boot_params.hdr.realmode_swtch) {
+ asm volatile("lcallw *%0"
+ : : "m" (boot_params.hdr.realmode_swtch)
+ : "eax", "ebx", "ecx", "edx");
+ } else {
+ asm volatile("cli");
+ outb(0x80, 0x70); /* Disable NMI */
+ io_delay();
+ }
+}
+
+/*
+ * Disable all interrupts at the legacy PIC.
+ */
+static void mask_all_interrupts(void)
+{
+ outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
+ io_delay();
+ outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
+ io_delay();
+}
+
+/*
+ * Reset IGNNE# if asserted in the FPU.
+ */
+static void reset_coprocessor(void)
+{
+ outb(0, 0xf0);
+ io_delay();
+ outb(0, 0xf1);
+ io_delay();
+}
+
+/*
+ * Set up the GDT
+ */
+
+struct gdt_ptr {
+ u16 len;
+ u32 ptr;
+} __attribute__((packed));
+
+static void setup_gdt(void)
+{
+ /* There are machines which are known to not boot with the GDT
+ being 8-byte unaligned. Intel recommends 16 byte alignment. */
+ static const u64 boot_gdt[] __attribute__((aligned(16))) = {
+ /* CS: code, read/execute, 4 GB, base 0 */
+ [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
+ /* DS: data, read/write, 4 GB, base 0 */
+ [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+ /* TSS: 32-bit tss, 104 bytes, base 4096 */
+ /* We only have a TSS here to keep Intel VT happy;
+ we don't actually use it for anything. */
+ [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
+ };
+ /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
+ of the gdt_ptr contents. Thus, make it static so it will
+ stay in memory, at least long enough that we switch to the
+ proper kernel GDT. */
+ static struct gdt_ptr gdt;
+
+ gdt.len = sizeof(boot_gdt)-1;
+ gdt.ptr = (u32)&boot_gdt + (ds() << 4);
+
+ asm volatile("lgdtl %0" : : "m" (gdt));
+}
+
+/*
+ * Set up the IDT
+ */
+static void setup_idt(void)
+{
+ static const struct gdt_ptr null_idt = {0, 0};
+ asm volatile("lidtl %0" : : "m" (null_idt));
+}
+
+/*
+ * Actual invocation sequence
+ */
+void go_to_protected_mode(void)
+{
+ /* Hook before leaving real mode, also disables interrupts */
+ realmode_switch_hook();
+
+ /* Enable the A20 gate */
+ if (enable_a20()) {
+ puts("A20 gate not responding, unable to boot...\n");
+ die();
+ }
+
+ /* Reset coprocessor (IGNNE#) */
+ reset_coprocessor();
+
+ /* Mask all interrupts in the PIC */
+ mask_all_interrupts();
+
+ /* Actual transition to protected mode... */
+ setup_idt();
+ setup_gdt();
+ protected_mode_jump(boot_params.hdr.code32_start,
+ (u32)&boot_params + (ds() << 4));
+}
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
new file mode 100644
index 000000000..cbec8bd08
--- /dev/null
+++ b/arch/x86/boot/pmjump.S
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * The actual transition into protected mode
+ */
+
+#include <asm/boot.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+#include <linux/linkage.h>
+
+ .text
+ .code16
+
+/*
+ * void protected_mode_jump(u32 entrypoint, u32 bootparams);
+ */
+SYM_FUNC_START_NOALIGN(protected_mode_jump)
+ movl %edx, %esi # Pointer to boot_params table
+
+ xorl %ebx, %ebx
+ movw %cs, %bx
+ shll $4, %ebx
+ addl %ebx, 2f
+ jmp 1f # Short jump to serialize on 386/486
+1:
+
+ movw $__BOOT_DS, %cx
+ movw $__BOOT_TSS, %di
+
+ movl %cr0, %edx
+ orb $X86_CR0_PE, %dl # Protected mode
+ movl %edx, %cr0
+
+ # Transition to 32-bit mode
+ .byte 0x66, 0xea # ljmpl opcode
+2: .long .Lin_pm32 # offset
+ .word __BOOT_CS # segment
+SYM_FUNC_END(protected_mode_jump)
+
+ .code32
+ .section ".text32","ax"
+SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32)
+ # Set up data segments for flat 32-bit mode
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %ecx, %fs
+ movl %ecx, %gs
+ movl %ecx, %ss
+ # The 32-bit code sets up its own stack, but this way we do have
+ # a valid stack if some debugging hack wants to use it.
+ addl %ebx, %esp
+
+ # Set up TR to make Intel VT happy
+ ltr %di
+
+ # Clear registers to allow for future extensions to the
+ # 32-bit boot protocol
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %ebx, %ebx
+ xorl %ebp, %ebp
+ xorl %edi, %edi
+
+ # Set up LDTR to make Intel VT happy
+ lldt %cx
+
+ jmpl *%eax # Jump to the 32-bit entrypoint
+SYM_FUNC_END(.Lin_pm32)
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
new file mode 100644
index 000000000..1237beeb9
--- /dev/null
+++ b/arch/x86/boot/printf.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Oh, it's a waste of space, but oh-so-yummy for debugging. This
+ * version of printf() does not include 64-bit support. "Live with
+ * it."
+ *
+ */
+
+#include "boot.h"
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) ({ \
+int __res; \
+__res = ((unsigned long) n) % (unsigned) base; \
+n = ((unsigned long) n) / (unsigned) base; \
+__res; })
+
+static char *number(char *str, long num, int base, int size, int precision,
+ int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ *str++ = ' ';
+ if (sign)
+ *str++ = sign;
+ if (type & SPECIAL) {
+ if (base == 8)
+ *str++ = '0';
+ else if (base == 16) {
+ *str++ = '0';
+ *str++ = ('X' | locase);
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ *str++ = c;
+ while (i < precision--)
+ *str++ = '0';
+ while (i-- > 0)
+ *str++ = tmp[i];
+ while (size-- > 0)
+ *str++ = ' ';
+ return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long num;
+ int i, base;
+ char *str;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ *str++ = *fmt;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ *str++ = ' ';
+ *str++ = (unsigned char)va_arg(args, int);
+ while (--field_width > 0)
+ *str++ = ' ';
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ *str++ = ' ';
+ for (i = 0; i < len; ++i)
+ *str++ = *s++;
+ while (len < field_width--)
+ *str++ = ' ';
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str,
+ (unsigned long)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ *str++ = '%';
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ *str++ = '%';
+ if (*fmt)
+ *str++ = *fmt;
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, unsigned long);
+ else if (qualifier == 'h') {
+ num = (unsigned short)va_arg(args, int);
+ if (flags & SIGN)
+ num = (short)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, unsigned int);
+ str = number(str, num, base, field_width, precision, flags);
+ }
+ *str = '\0';
+ return str - buf;
+}
+
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsprintf(buf, fmt, args);
+ va_end(args);
+ return i;
+}
+
+int printf(const char *fmt, ...)
+{
+ char printf_buf[1024];
+ va_list args;
+ int printed;
+
+ va_start(args, fmt);
+ printed = vsprintf(printf_buf, fmt, args);
+ va_end(args);
+
+ puts(printf_buf);
+
+ return printed;
+}
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 000000000..55de6b309
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+#include "string.h"
+
+void initregs(struct biosregs *reg)
+{
+ memset(reg, 0, sizeof(*reg));
+ reg->eflags |= X86_EFLAGS_CF;
+ reg->ds = ds();
+ reg->es = ds();
+ reg->fs = fs();
+ reg->gs = gs();
+}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
new file mode 100644
index 000000000..49546c247
--- /dev/null
+++ b/arch/x86/boot/setup.ld
@@ -0,0 +1,66 @@
+/*
+ * setup.ld
+ *
+ * Linker script for the i386 setup code
+ */
+OUTPUT_FORMAT("elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = 0;
+ .bstext : { *(.bstext) }
+ .bsdata : { *(.bsdata) }
+
+ . = 495;
+ .header : { *(.header) }
+ .entrytext : { *(.entrytext) }
+ .inittext : { *(.inittext) }
+ .initdata : { *(.initdata) }
+ __end_init = .;
+
+ .text : { *(.text .text.*) }
+ .text32 : { *(.text32) }
+
+ . = ALIGN(16);
+ .rodata : { *(.rodata*) }
+
+ .videocards : {
+ video_cards = .;
+ *(.videocards)
+ video_cards_end = .;
+ }
+
+ . = ALIGN(16);
+ .data : { *(.data*) }
+
+ .signature : {
+ setup_sig = .;
+ LONG(0x5a5aaa55)
+ }
+
+
+ . = ALIGN(16);
+ .bss :
+ {
+ __bss_start = .;
+ *(.bss)
+ __bss_end = .;
+ }
+ . = ALIGN(16);
+ _end = .;
+
+ /DISCARD/ : {
+ *(.note*)
+ }
+
+ /*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
+ . = ASSERT(_end <= 0x8000, "Setup too big!");
+ . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+ /* Necessary for the very-old-loader check to work... */
+ . = ASSERT(__end_init <= 5*512, "init sections too big!");
+
+}
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
new file mode 100644
index 000000000..8a3fff912
--- /dev/null
+++ b/arch/x86/boot/string.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very basic string functions
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/limits.h>
+#include <asm/asm.h>
+#include "ctype.h"
+#include "string.h"
+
+#define KSTRTOX_OVERFLOW (1U << 31)
+
+/*
+ * Undef these macros so that the functions that we provide
+ * here will have the correct names regardless of how string.h
+ * may have chosen to #define them.
+ */
+#undef memcpy
+#undef memset
+#undef memcmp
+
+int memcmp(const void *s1, const void *s2, size_t len)
+{
+ bool diff;
+ asm("repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
+/*
+ * Clang may lower `memcmp == 0` to `bcmp == 0`.
+ */
+int bcmp(const void *s1, const void *s2, size_t len)
+{
+ return memcmp(s1, s2, len);
+}
+
+int strcmp(const char *str1, const char *str2)
+{
+ const unsigned char *s1 = (const unsigned char *)str1;
+ const unsigned char *s2 = (const unsigned char *)str2;
+ int delta = 0;
+
+ while (*s1 || *s2) {
+ delta = *s1 - *s2;
+ if (delta)
+ return delta;
+ s1++;
+ s2++;
+ }
+ return 0;
+}
+
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+size_t strnlen(const char *s, size_t maxlen)
+{
+ const char *es = s;
+ while (*es && maxlen) {
+ es++;
+ maxlen--;
+ }
+
+ return (es - s);
+}
+
+unsigned int atou(const char *s)
+{
+ unsigned int i = 0;
+ while (isdigit(*s))
+ i = i * 10 + (*s++ - '0');
+ return i;
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+static unsigned int simple_guess_base(const char *cp)
+{
+ if (cp[0] == '0') {
+ if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+ return 16;
+ else
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
+{
+ unsigned long long result = 0;
+
+ if (!base)
+ base = simple_guess_base(cp);
+
+ if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+ cp += 2;
+
+ while (isxdigit(*cp)) {
+ unsigned int value;
+
+ value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+ if (value >= base)
+ break;
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+
+ return result;
+}
+
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoull(cp + 1, endp, base);
+
+ return simple_strtoull(cp, endp, base);
+}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/**
+ * strchr - Find the first occurrence of the character c in the string s.
+ * @s: the string to be searched
+ * @c: the character to search for
+ */
+char *strchr(const char *s, int c)
+{
+ while (*s != (char)c)
+ if (*s++ == '\0')
+ return NULL;
+ return (char *)s;
+}
+
+static inline u64 __div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ union {
+ u64 v64;
+ u32 v32[2];
+ } d = { dividend };
+ u32 upper;
+
+ upper = d.v32[1];
+ d.v32[1] = 0;
+ if (upper >= divisor) {
+ d.v32[1] = upper / divisor;
+ upper %= divisor;
+ }
+ asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
+ "rm" (divisor), "0" (d.v32[0]), "1" (upper));
+ return d.v64;
+}
+
+static inline u64 __div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+
+ return __div_u64_rem(dividend, divisor, &remainder);
+}
+
+static inline char _tolower(const char c)
+{
+ return c | 0x20;
+}
+
+static const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+static unsigned int _parse_integer(const char *s,
+ unsigned int base,
+ unsigned long long *p)
+{
+ unsigned long long res;
+ unsigned int rv;
+
+ res = 0;
+ rv = 0;
+ while (1) {
+ unsigned int c = *s;
+ unsigned int lc = c | 0x20; /* don't tolower() this line */
+ unsigned int val;
+
+ if ('0' <= c && c <= '9')
+ val = c - '0';
+ else if ('a' <= lc && lc <= 'f')
+ val = lc - 'a' + 10;
+ else
+ break;
+
+ if (val >= base)
+ break;
+ /*
+ * Check for overflow only if we are within range of
+ * it in the max base we support (16)
+ */
+ if (unlikely(res & (~0ull << 60))) {
+ if (res > __div_u64(ULLONG_MAX - val, base))
+ rv |= KSTRTOX_OVERFLOW;
+ }
+ res = res * base + val;
+ rv++;
+ s++;
+ }
+ *p = res;
+ return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ unsigned long long _res;
+ unsigned int rv;
+
+ s = _parse_integer_fixup_radix(s, &base);
+ rv = _parse_integer(s, base, &_res);
+ if (rv & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+ if (rv == 0)
+ return -EINVAL;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (s[0] == '+')
+ s++;
+ return _kstrtoull(s, base, res);
+}
+
+static int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the simple_strtoull.
+ */
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+ */
+ if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+ __alignof__(unsigned long) == __alignof__(unsigned long long))
+ return kstrtoull(s, base, (unsigned long long *)res);
+ else
+ return _kstrtoul(s, base, res);
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
new file mode 100644
index 000000000..e5d2c6b8c
--- /dev/null
+++ b/arch/x86/boot/string.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_STRING_H
+#define BOOT_STRING_H
+
+/* Undef any of these macros coming from string_32.h. */
+#undef memcpy
+#undef memset
+#undef memcmp
+
+void *memcpy(void *dst, const void *src, size_t len);
+void *memmove(void *dst, const void *src, size_t len);
+void *memset(void *dst, int c, size_t len);
+int memcmp(const void *s1, const void *s2, size_t len);
+int bcmp(const void *s1, const void *s2, size_t len);
+
+/* Access builtin version by default. */
+#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
+#define memset(d,c,l) __builtin_memset(d,c,l)
+#define memcmp __builtin_memcmp
+
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern char *strchr(const char *s, int c);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+ unsigned int base);
+long simple_strtol(const char *cp, char **endp, unsigned int base);
+
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res);
+#endif /* BOOT_STRING_H */
diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore
new file mode 100644
index 000000000..ae91f4d0d
--- /dev/null
+++ b/arch/x86/boot/tools/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+build
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
new file mode 100644
index 000000000..a3725ad46
--- /dev/null
+++ b/arch/x86/boot/tools/build.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares
+ * Copyright (C) 2007 H. Peter Anvin
+ */
+
+/*
+ * This file builds a disk-image from three different files:
+ *
+ * - setup: 8086 machine code, sets up system parm
+ * - system: 80386 code for actual system
+ * - zoffset.h: header with ZO_* defines
+ *
+ * It does some checking that all files are of the correct type, and writes
+ * the result to the specified destination, removing headers and padding to
+ * the right amount. It also writes some system data to stdout.
+ */
+
+/*
+ * Changes by tytso to allow root device specification
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * Cross compiling fixes by Gertjan van Wingerde, July 1996
+ * Rewritten by Martin Mares, April 1997
+ * Substantially overhauled by H. Peter Anvin, April 2007
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <tools/le_byteshift.h>
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+
+#define DEFAULT_MAJOR_ROOT 0
+#define DEFAULT_MINOR_ROOT 0
+#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
+
+/* Minimal number of setup sectors */
+#define SETUP_SECT_MIN 5
+#define SETUP_SECT_MAX 64
+
+/* This must be large enough to hold the entire setup */
+u8 buf[SETUP_SECT_MAX*512];
+
+#define PECOFF_RELOC_RESERVE 0x20
+
+#ifdef CONFIG_EFI_MIXED
+#define PECOFF_COMPAT_RESERVE 0x20
+#else
+#define PECOFF_COMPAT_RESERVE 0x0
+#endif
+
+static unsigned long efi32_stub_entry;
+static unsigned long efi64_stub_entry;
+static unsigned long efi_pe_entry;
+static unsigned long efi32_pe_entry;
+static unsigned long kernel_info;
+static unsigned long startup_64;
+static unsigned long _ehead;
+static unsigned long _end;
+
+/*----------------------------------------------------------------------*/
+
+static const u32 crctab32[] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+ 0x2d02ef8d
+};
+
+static u32 partial_crc32_one(u8 c, u32 crc)
+{
+ return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
+}
+
+static u32 partial_crc32(const u8 *s, int len, u32 crc)
+{
+ while (len--)
+ crc = partial_crc32_one(*s++, crc);
+ return crc;
+}
+
+static void die(const char * str, ...)
+{
+ va_list args;
+ va_start(args, str);
+ vfprintf(stderr, str, args);
+ va_end(args);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static void usage(void)
+{
+ die("Usage: build setup system zoffset.h image");
+}
+
+#ifdef CONFIG_EFI_STUB
+
+static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
+{
+ unsigned int pe_header;
+ unsigned short num_sections;
+ u8 *section;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+ num_sections = get_unaligned_le16(&buf[pe_header + 6]);
+
+#ifdef CONFIG_X86_32
+ section = &buf[pe_header + 0xa8];
+#else
+ section = &buf[pe_header + 0xb8];
+#endif
+
+ while (num_sections > 0) {
+ if (strncmp((char*)section, section_name, 8) == 0) {
+ /* section header size field */
+ put_unaligned_le32(size, section + 0x8);
+
+ /* section header vma field */
+ put_unaligned_le32(vma, section + 0xc);
+
+ /* section header 'size of initialised data' field */
+ put_unaligned_le32(datasz, section + 0x10);
+
+ /* section header 'file offset' field */
+ put_unaligned_le32(offset, section + 0x14);
+
+ break;
+ }
+ section += 0x28;
+ num_sections--;
+ }
+}
+
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+ update_pecoff_section_header_fields(section_name, offset, size, size, offset);
+}
+
+static void update_pecoff_setup_and_reloc(unsigned int size)
+{
+ u32 setup_offset = 0x200;
+ u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE;
+#ifdef CONFIG_EFI_MIXED
+ u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE;
+#endif
+ u32 setup_size = reloc_offset - setup_offset;
+
+ update_pecoff_section_header(".setup", setup_offset, setup_size);
+ update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
+
+ /*
+ * Modify .reloc section contents with a single entry. The
+ * relocation is applied to offset 10 of the relocation section.
+ */
+ put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
+ put_unaligned_le32(10, &buf[reloc_offset + 4]);
+
+#ifdef CONFIG_EFI_MIXED
+ update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE);
+
+ /*
+ * Put the IA-32 machine type (0x14c) and the associated entry point
+ * address in the .compat section, so loaders can figure out which other
+ * execution modes this image supports.
+ */
+ buf[compat_offset] = 0x1;
+ buf[compat_offset + 1] = 0x8;
+ put_unaligned_le16(0x14c, &buf[compat_offset + 2]);
+ put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]);
+#endif
+}
+
+static void update_pecoff_text(unsigned int text_start, unsigned int file_sz,
+ unsigned int init_sz)
+{
+ unsigned int pe_header;
+ unsigned int text_sz = file_sz - text_start;
+ unsigned int bss_sz = init_sz - file_sz;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+
+ /*
+ * The PE/COFF loader may load the image at an address which is
+ * misaligned with respect to the kernel_alignment field in the setup
+ * header.
+ *
+ * In order to avoid relocating the kernel to correct the misalignment,
+ * add slack to allow the buffer to be aligned within the declared size
+ * of the image.
+ */
+ bss_sz += CONFIG_PHYSICAL_ALIGN;
+ init_sz += CONFIG_PHYSICAL_ALIGN;
+
+ /*
+ * Size of code: Subtract the size of the first sector (512 bytes)
+ * which includes the header.
+ */
+ put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]);
+
+ /* Size of image */
+ put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
+
+ /*
+ * Address of entry point for PE/COFF executable
+ */
+ put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
+
+ update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz,
+ text_sz, text_start);
+}
+
+static int reserve_pecoff_reloc_section(int c)
+{
+ /* Reserve 0x20 bytes for .reloc section */
+ memset(buf+c, 0, PECOFF_RELOC_RESERVE);
+ return PECOFF_RELOC_RESERVE;
+}
+
+static void efi_stub_defaults(void)
+{
+ /* Defaults for old kernel */
+#ifdef CONFIG_X86_32
+ efi_pe_entry = 0x10;
+#else
+ efi_pe_entry = 0x210;
+ startup_64 = 0x200;
+#endif
+}
+
+static void efi_stub_entry_update(void)
+{
+ unsigned long addr = efi32_stub_entry;
+
+#ifdef CONFIG_X86_64
+ /* Yes, this is really how we defined it :( */
+ addr = efi64_stub_entry - 0x200;
+#endif
+
+#ifdef CONFIG_EFI_MIXED
+ if (efi32_stub_entry != addr)
+ die("32-bit and 64-bit EFI entry points do not match\n");
+#endif
+ put_unaligned_le32(addr, &buf[0x264]);
+}
+
+#else
+
+static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
+static inline void update_pecoff_text(unsigned int text_start,
+ unsigned int file_sz,
+ unsigned int init_sz) {}
+static inline void efi_stub_defaults(void) {}
+static inline void efi_stub_entry_update(void) {}
+
+static inline int reserve_pecoff_reloc_section(int c)
+{
+ return 0;
+}
+#endif /* CONFIG_EFI_STUB */
+
+static int reserve_pecoff_compat_section(int c)
+{
+ /* Reserve 0x20 bytes for .compat section */
+ memset(buf+c, 0, PECOFF_COMPAT_RESERVE);
+ return PECOFF_COMPAT_RESERVE;
+}
+
+/*
+ * Parse zoffset.h and find the entry points. We could just #include zoffset.h
+ * but that would mean tools/build would have to be rebuilt every time. It's
+ * not as if parsing it is hard...
+ */
+#define PARSE_ZOFS(p, sym) do { \
+ if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
+ sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
+} while (0)
+
+static void parse_zoffset(char *fname)
+{
+ FILE *file;
+ char *p;
+ int c;
+
+ file = fopen(fname, "r");
+ if (!file)
+ die("Unable to open `%s': %m", fname);
+ c = fread(buf, 1, sizeof(buf) - 1, file);
+ if (ferror(file))
+ die("read-error on `zoffset.h'");
+ fclose(file);
+ buf[c] = 0;
+
+ p = (char *)buf;
+
+ while (p && *p) {
+ PARSE_ZOFS(p, efi32_stub_entry);
+ PARSE_ZOFS(p, efi64_stub_entry);
+ PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, efi32_pe_entry);
+ PARSE_ZOFS(p, kernel_info);
+ PARSE_ZOFS(p, startup_64);
+ PARSE_ZOFS(p, _ehead);
+ PARSE_ZOFS(p, _end);
+
+ p = strchr(p, '\n');
+ while (p && (*p == '\r' || *p == '\n'))
+ p++;
+ }
+}
+
+int main(int argc, char ** argv)
+{
+ unsigned int i, sz, setup_sectors, init_sz;
+ int c;
+ u32 sys_size;
+ struct stat sb;
+ FILE *file, *dest;
+ int fd;
+ void *kernel;
+ u32 crc = 0xffffffffUL;
+
+ efi_stub_defaults();
+
+ if (argc != 5)
+ usage();
+ parse_zoffset(argv[3]);
+
+ dest = fopen(argv[4], "w");
+ if (!dest)
+ die("Unable to write `%s': %m", argv[4]);
+
+ /* Copy the setup code */
+ file = fopen(argv[1], "r");
+ if (!file)
+ die("Unable to open `%s': %m", argv[1]);
+ c = fread(buf, 1, sizeof(buf), file);
+ if (ferror(file))
+ die("read-error on `setup'");
+ if (c < 1024)
+ die("The setup must be at least 1024 bytes");
+ if (get_unaligned_le16(&buf[510]) != 0xAA55)
+ die("Boot block hasn't got boot flag (0xAA55)");
+ fclose(file);
+
+ c += reserve_pecoff_compat_section(c);
+ c += reserve_pecoff_reloc_section(c);
+
+ /* Pad unused space with zeros */
+ setup_sectors = (c + 511) / 512;
+ if (setup_sectors < SETUP_SECT_MIN)
+ setup_sectors = SETUP_SECT_MIN;
+ i = setup_sectors*512;
+ memset(buf+c, 0, i-c);
+
+ update_pecoff_setup_and_reloc(i);
+
+ /* Set the default root device */
+ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
+
+ /* Open and stat the kernel file */
+ fd = open(argv[2], O_RDONLY);
+ if (fd < 0)
+ die("Unable to open `%s': %m", argv[2]);
+ if (fstat(fd, &sb))
+ die("Unable to stat `%s': %m", argv[2]);
+ sz = sb.st_size;
+ kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+ if (kernel == MAP_FAILED)
+ die("Unable to mmap '%s': %m", argv[2]);
+ /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
+ sys_size = (sz + 15 + 4) / 16;
+#ifdef CONFIG_EFI_STUB
+ /*
+ * COFF requires minimum 32-byte alignment of sections, and
+ * adding a signature is problematic without that alignment.
+ */
+ sys_size = (sys_size + 1) & ~1;
+#endif
+
+ /* Patch the setup code with the appropriate size parameters */
+ buf[0x1f1] = setup_sectors-1;
+ put_unaligned_le32(sys_size, &buf[0x1f4]);
+
+ init_sz = get_unaligned_le32(&buf[0x260]);
+#ifdef CONFIG_EFI_STUB
+ /*
+ * The decompression buffer will start at ImageBase. When relocating
+ * the compressed kernel to its end, we must ensure that the head
+ * section does not get overwritten. The head section occupies
+ * [i, i + _ehead), and the destination is [init_sz - _end, init_sz).
+ *
+ * At present these should never overlap, because 'i' is at most 32k
+ * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the
+ * calculation of INIT_SIZE in boot/header.S ensures that
+ * 'init_sz - _end' is at least 64k.
+ *
+ * For future-proofing, increase init_sz if necessary.
+ */
+
+ if (init_sz - _end < i + _ehead) {
+ init_sz = (i + _ehead + _end + 4095) & ~4095;
+ put_unaligned_le32(init_sz, &buf[0x260]);
+ }
+#endif
+ update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz);
+
+ efi_stub_entry_update();
+
+ /* Update kernel_info offset. */
+ put_unaligned_le32(kernel_info, &buf[0x268]);
+
+ crc = partial_crc32(buf, i, crc);
+ if (fwrite(buf, 1, i, dest) != i)
+ die("Writing setup failed");
+
+ /* Copy the kernel code */
+ crc = partial_crc32(kernel, sz, crc);
+ if (fwrite(kernel, 1, sz, dest) != sz)
+ die("Writing kernel failed");
+
+ /* Add padding leaving 4 bytes for the checksum */
+ while (sz++ < (sys_size*16) - 4) {
+ crc = partial_crc32_one('\0', crc);
+ if (fwrite("\0", 1, 1, dest) != 1)
+ die("Writing padding failed");
+ }
+
+ /* Write the CRC */
+ put_unaligned_le32(crc, buf);
+ if (fwrite(buf, 1, 4, dest) != 4)
+ die("Writing CRC failed");
+
+ /* Catch any delayed write failures */
+ if (fclose(dest))
+ die("Writing image failed");
+
+ close(fd);
+
+ /* Everything is OK */
+ return 0;
+}
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
new file mode 100644
index 000000000..f7eb976b0
--- /dev/null
+++ b/arch/x86/boot/tty.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very simple screen and serial I/O
+ */
+
+#include "boot.h"
+
+int early_serial_base;
+
+#define XMTRDY 0x20
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define LSR 5 /* Line Status */
+
+/*
+ * These functions are in .inittext so they can be used to signal
+ * error during initialization.
+ */
+
+static void __section(".inittext") serial_putchar(int ch)
+{
+ unsigned timeout = 0xffff;
+
+ while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+ cpu_relax();
+
+ outb(ch, early_serial_base + TXR);
+}
+
+static void __section(".inittext") bios_putchar(int ch)
+{
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.bx = 0x0007;
+ ireg.cx = 0x0001;
+ ireg.ah = 0x0e;
+ ireg.al = ch;
+ intcall(0x10, &ireg, NULL);
+}
+
+void __section(".inittext") putchar(int ch)
+{
+ if (ch == '\n')
+ putchar('\r'); /* \n -> \r\n */
+
+ bios_putchar(ch);
+
+ if (early_serial_base != 0)
+ serial_putchar(ch);
+}
+
+void __section(".inittext") puts(const char *str)
+{
+ while (*str)
+ putchar(*str++);
+}
+
+/*
+ * Read the CMOS clock through the BIOS, and return the
+ * seconds in BCD.
+ */
+
+static u8 gettime(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x02;
+ intcall(0x1a, &ireg, &oreg);
+
+ return oreg.dh;
+}
+
+/*
+ * Read from the keyboard
+ */
+int getchar(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ /* ireg.ah = 0x00; */
+ intcall(0x16, &ireg, &oreg);
+
+ return oreg.al;
+}
+
+static int kbd_pending(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x01;
+ intcall(0x16, &ireg, &oreg);
+
+ return !(oreg.eflags & X86_EFLAGS_ZF);
+}
+
+void kbd_flush(void)
+{
+ for (;;) {
+ if (!kbd_pending())
+ break;
+ getchar();
+ }
+}
+
+int getchar_timeout(void)
+{
+ int cnt = 30;
+ int t0, t1;
+
+ t0 = gettime();
+
+ while (cnt) {
+ if (kbd_pending())
+ return getchar();
+
+ t1 = gettime();
+ if (t0 != t1) {
+ cnt--;
+ t0 = t1;
+ }
+ }
+
+ return 0; /* Timeout! */
+}
+
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
new file mode 100644
index 000000000..945383f0f
--- /dev/null
+++ b/arch/x86/boot/version.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Kernel version string
+ */
+
+#include "boot.h"
+#include <generated/utsversion.h>
+#include <generated/utsrelease.h>
+#include <generated/compile.h>
+
+const char kernel_version[] =
+ UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
+ UTS_VERSION;
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h
new file mode 100644
index 000000000..9e23fdffb
--- /dev/null
+++ b/arch/x86/boot/vesa.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifndef BOOT_VESA_H
+#define BOOT_VESA_H
+
+typedef struct {
+ u16 off, seg;
+} far_ptr;
+
+/* VESA General Information table */
+struct vesa_general_info {
+ u32 signature; /* 0 Magic number = "VESA" */
+ u16 version; /* 4 */
+ far_ptr vendor_string; /* 6 */
+ u32 capabilities; /* 10 */
+ far_ptr video_mode_ptr; /* 14 */
+ u16 total_memory; /* 18 */
+
+ u8 reserved[236]; /* 20 */
+} __attribute__ ((packed));
+
+#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
+
+struct vesa_mode_info {
+ u16 mode_attr; /* 0 */
+ u8 win_attr[2]; /* 2 */
+ u16 win_grain; /* 4 */
+ u16 win_size; /* 6 */
+ u16 win_seg[2]; /* 8 */
+ far_ptr win_scheme; /* 12 */
+ u16 logical_scan; /* 16 */
+
+ u16 h_res; /* 18 */
+ u16 v_res; /* 20 */
+ u8 char_width; /* 22 */
+ u8 char_height; /* 23 */
+ u8 memory_planes; /* 24 */
+ u8 bpp; /* 25 */
+ u8 banks; /* 26 */
+ u8 memory_layout; /* 27 */
+ u8 bank_size; /* 28 */
+ u8 image_planes; /* 29 */
+ u8 page_function; /* 30 */
+
+ u8 rmask; /* 31 */
+ u8 rpos; /* 32 */
+ u8 gmask; /* 33 */
+ u8 gpos; /* 34 */
+ u8 bmask; /* 35 */
+ u8 bpos; /* 36 */
+ u8 resv_mask; /* 37 */
+ u8 resv_pos; /* 38 */
+ u8 dcm_info; /* 39 */
+
+ u32 lfb_ptr; /* 40 Linear frame buffer address */
+ u32 offscreen_ptr; /* 44 Offscreen memory address */
+ u16 offscreen_size; /* 48 */
+
+ u8 reserved[206]; /* 50 */
+} __attribute__ ((packed));
+
+#endif /* LIB_SYS_VESA_H */
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
new file mode 100644
index 000000000..6eb8c06bc
--- /dev/null
+++ b/arch/x86/boot/video-bios.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Standard video BIOS modes
+ *
+ * We have two options for this; silent and scanned.
+ */
+
+#include "boot.h"
+#include "video.h"
+
+static __videocard video_bios;
+
+/* Set a conventional BIOS mode */
+static int set_bios_mode(u8 mode);
+
+static int bios_set_mode(struct mode_info *mi)
+{
+ return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
+}
+
+static int set_bios_mode(u8 mode)
+{
+ struct biosregs ireg, oreg;
+ u8 new_mode;
+
+ initregs(&ireg);
+ ireg.al = mode; /* AH=0x00 Set Video Mode */
+ intcall(0x10, &ireg, NULL);
+
+ ireg.ah = 0x0f; /* Get Current Video Mode */
+ intcall(0x10, &ireg, &oreg);
+
+ do_restore = 1; /* Assume video contents were lost */
+
+ /* Not all BIOSes are clean with the top bit */
+ new_mode = oreg.al & 0x7f;
+
+ if (new_mode == mode)
+ return 0; /* Mode change OK */
+
+#ifndef _WAKEUP
+ if (new_mode != boot_params.screen_info.orig_video_mode) {
+ /* Mode setting failed, but we didn't end up where we
+ started. That's bad. Try to revert to the original
+ video mode. */
+ ireg.ax = boot_params.screen_info.orig_video_mode;
+ intcall(0x10, &ireg, NULL);
+ }
+#endif
+ return -1;
+}
+
+static int bios_probe(void)
+{
+ u8 mode;
+#ifdef _WAKEUP
+ u8 saved_mode = 0x03;
+#else
+ u8 saved_mode = boot_params.screen_info.orig_video_mode;
+#endif
+ u16 crtc;
+ struct mode_info *mi;
+ int nmodes = 0;
+
+ if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
+ return 0;
+
+ set_fs(0);
+ crtc = vga_crtc();
+
+ video_bios.modes = GET_HEAP(struct mode_info, 0);
+
+ for (mode = 0x14; mode <= 0x7f; mode++) {
+ if (!heap_free(sizeof(struct mode_info)))
+ break;
+
+ if (mode_defined(VIDEO_FIRST_BIOS+mode))
+ continue;
+
+ if (set_bios_mode(mode))
+ continue;
+
+ /* Try to verify that it's a text mode. */
+
+ /* Attribute Controller: make graphics controller disabled */
+ if (in_idx(0x3c0, 0x10) & 0x01)
+ continue;
+
+ /* Graphics Controller: verify Alpha addressing enabled */
+ if (in_idx(0x3ce, 0x06) & 0x01)
+ continue;
+
+ /* CRTC cursor location low should be zero(?) */
+ if (in_idx(crtc, 0x0f))
+ continue;
+
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = VIDEO_FIRST_BIOS+mode;
+ mi->depth = 0; /* text */
+ mi->x = rdfs16(0x44a);
+ mi->y = rdfs8(0x484)+1;
+ nmodes++;
+ }
+
+ set_bios_mode(saved_mode);
+
+ return nmodes;
+}
+
+static __videocard video_bios =
+{
+ .card_name = "BIOS",
+ .probe = bios_probe,
+ .set_mode = bios_set_mode,
+ .unsafe = 1,
+ .xmode_first = VIDEO_FIRST_BIOS,
+ .xmode_n = 0x80,
+};
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
new file mode 100644
index 000000000..9ada55dc1
--- /dev/null
+++ b/arch/x86/boot/video-mode.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/i386/boot/video-mode.c
+ *
+ * Set the video mode. This is separated out into a different
+ * file in order to be shared with the ACPI wakeup code.
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+#include <uapi/asm/boot.h>
+
+/*
+ * Common variables
+ */
+int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
+int force_x, force_y; /* Don't query the BIOS for cols/rows */
+int do_restore; /* Screen contents changed during mode flip */
+int graphic_mode; /* Graphic mode with linear frame buffer */
+
+/* Probe the video drivers and have them generate their mode lists. */
+void probe_cards(int unsafe)
+{
+ struct card_info *card;
+ static u8 probed[2];
+
+ if (probed[unsafe])
+ return;
+
+ probed[unsafe] = 1;
+
+ for (card = video_cards; card < video_cards_end; card++) {
+ if (card->unsafe == unsafe) {
+ if (card->probe)
+ card->nmodes = card->probe();
+ else
+ card->nmodes = 0;
+ }
+ }
+}
+
+/* Test if a mode is defined */
+int mode_defined(u16 mode)
+{
+ struct card_info *card;
+ struct mode_info *mi;
+ int i;
+
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ if (mi->mode == mode)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* Set mode (without recalc) */
+static int raw_set_mode(u16 mode, u16 *real_mode)
+{
+ int nmode, i;
+ struct card_info *card;
+ struct mode_info *mi;
+
+ /* Drop the recalc bit if set */
+ mode &= ~VIDEO_RECALC;
+
+ /* Scan for mode based on fixed ID, position, or resolution */
+ nmode = 0;
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ int visible = mi->x || mi->y;
+
+ if ((mode == nmode && visible) ||
+ mode == mi->mode ||
+ mode == (mi->y << 8)+mi->x) {
+ *real_mode = mi->mode;
+ return card->set_mode(mi);
+ }
+
+ if (visible)
+ nmode++;
+ }
+ }
+
+ /* Nothing found? Is it an "exceptional" (unprobed) mode? */
+ for (card = video_cards; card < video_cards_end; card++) {
+ if (mode >= card->xmode_first &&
+ mode < card->xmode_first+card->xmode_n) {
+ struct mode_info mix;
+ *real_mode = mix.mode = mode;
+ mix.x = mix.y = 0;
+ return card->set_mode(&mix);
+ }
+ }
+
+ /* Otherwise, failure... */
+ return -1;
+}
+
+/*
+ * Recalculate the vertical video cutoff (hack!)
+ */
+static void vga_recalc_vertical(void)
+{
+ unsigned int font_size, rows;
+ u16 crtc;
+ u8 pt, ov;
+
+ set_fs(0);
+ font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
+ rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
+
+ rows *= font_size; /* Visible scan lines */
+ rows--; /* ... minus one */
+
+ crtc = vga_crtc();
+
+ pt = in_idx(crtc, 0x11);
+ pt &= ~0x80; /* Unlock CR0-7 */
+ out_idx(pt, crtc, 0x11);
+
+ out_idx((u8)rows, crtc, 0x12); /* Lower height register */
+
+ ov = in_idx(crtc, 0x07); /* Overflow register */
+ ov &= 0xbd;
+ ov |= (rows >> (8-1)) & 0x02;
+ ov |= (rows >> (9-6)) & 0x40;
+ out_idx(ov, crtc, 0x07);
+}
+
+/* Set mode (with recalc if specified) */
+int set_mode(u16 mode)
+{
+ int rv;
+ u16 real_mode;
+
+ /* Very special mode numbers... */
+ if (mode == VIDEO_CURRENT_MODE)
+ return 0; /* Nothing to do... */
+ else if (mode == NORMAL_VGA)
+ mode = VIDEO_80x25;
+ else if (mode == EXTENDED_VGA)
+ mode = VIDEO_8POINT;
+
+ rv = raw_set_mode(mode, &real_mode);
+ if (rv)
+ return rv;
+
+ if (mode & VIDEO_RECALC)
+ vga_recalc_vertical();
+
+ /* Save the canonical mode number for the kernel, not
+ an alias, size specification or menu position */
+#ifndef _WAKEUP
+ boot_params.hdr.vid_mode = real_mode;
+#endif
+ return 0;
+}
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
new file mode 100644
index 000000000..c2c6d35e3
--- /dev/null
+++ b/arch/x86/boot/video-vesa.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * VESA text modes
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+#include "string.h"
+
+/* VESA information */
+static struct vesa_general_info vginfo;
+static struct vesa_mode_info vminfo;
+
+static __videocard video_vesa;
+
+#ifndef _WAKEUP
+static void vesa_store_mode_params_graphics(void);
+#else /* _WAKEUP */
+static inline void vesa_store_mode_params_graphics(void) {}
+#endif /* _WAKEUP */
+
+static int vesa_probe(void)
+{
+ struct biosregs ireg, oreg;
+ u16 mode;
+ addr_t mode_ptr;
+ struct mode_info *mi;
+ int nmodes = 0;
+
+ video_vesa.modes = GET_HEAP(struct mode_info, 0);
+
+ initregs(&ireg);
+ ireg.ax = 0x4f00;
+ ireg.di = (size_t)&vginfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f ||
+ vginfo.signature != VESA_MAGIC ||
+ vginfo.version < 0x0102)
+ return 0; /* Not present */
+
+ set_fs(vginfo.video_mode_ptr.seg);
+ mode_ptr = vginfo.video_mode_ptr.off;
+
+ while ((mode = rdfs16(mode_ptr)) != 0xffff) {
+ mode_ptr += 2;
+
+ if (!heap_free(sizeof(struct mode_info)))
+ break; /* Heap full, can't save mode info */
+
+ if (mode & ~0x1ff)
+ continue;
+
+ memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
+
+ ireg.ax = 0x4f01;
+ ireg.cx = mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ continue;
+
+ if ((vminfo.mode_attr & 0x15) == 0x05) {
+ /* Text Mode, TTY BIOS supported,
+ supported by hardware */
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = mode + VIDEO_FIRST_VESA;
+ mi->depth = 0; /* text */
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
+ nmodes++;
+ } else if ((vminfo.mode_attr & 0x99) == 0x99 &&
+ (vminfo.memory_layout == 4 ||
+ vminfo.memory_layout == 6) &&
+ vminfo.memory_planes == 1) {
+#ifdef CONFIG_BOOT_VESA_SUPPORT
+ /* Graphics mode, color, linear frame buffer
+ supported. Only register the mode if
+ if framebuffer is configured, however,
+ otherwise the user will be left without a screen. */
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = mode + VIDEO_FIRST_VESA;
+ mi->depth = vminfo.bpp;
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
+ nmodes++;
+#endif
+ }
+ }
+
+ return nmodes;
+}
+
+static int vesa_set_mode(struct mode_info *mode)
+{
+ struct biosregs ireg, oreg;
+ int is_graphic;
+ u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
+
+ memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
+
+ initregs(&ireg);
+ ireg.ax = 0x4f01;
+ ireg.cx = vesa_mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return -1;
+
+ if ((vminfo.mode_attr & 0x15) == 0x05) {
+ /* It's a supported text mode */
+ is_graphic = 0;
+#ifdef CONFIG_BOOT_VESA_SUPPORT
+ } else if ((vminfo.mode_attr & 0x99) == 0x99) {
+ /* It's a graphics mode with linear frame buffer */
+ is_graphic = 1;
+ vesa_mode |= 0x4000; /* Request linear frame buffer */
+#endif
+ } else {
+ return -1; /* Invalid mode */
+ }
+
+
+ initregs(&ireg);
+ ireg.ax = 0x4f02;
+ ireg.bx = vesa_mode;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return -1;
+
+ graphic_mode = is_graphic;
+ if (!is_graphic) {
+ /* Text mode */
+ force_x = mode->x;
+ force_y = mode->y;
+ do_restore = 1;
+ } else {
+ /* Graphics mode */
+ vesa_store_mode_params_graphics();
+ }
+
+ return 0;
+}
+
+
+#ifndef _WAKEUP
+
+/* Switch DAC to 8-bit mode */
+static void vesa_dac_set_8bits(void)
+{
+ struct biosregs ireg, oreg;
+ u8 dac_size = 6;
+
+ /* If possible, switch the DAC to 8-bit mode */
+ if (vginfo.capabilities & 1) {
+ initregs(&ireg);
+ ireg.ax = 0x4f08;
+ ireg.bh = 0x08;
+ intcall(0x10, &ireg, &oreg);
+ if (oreg.ax == 0x004f)
+ dac_size = oreg.bh;
+ }
+
+ /* Set the color sizes to the DAC size, and offsets to 0 */
+ boot_params.screen_info.red_size = dac_size;
+ boot_params.screen_info.green_size = dac_size;
+ boot_params.screen_info.blue_size = dac_size;
+ boot_params.screen_info.rsvd_size = dac_size;
+
+ boot_params.screen_info.red_pos = 0;
+ boot_params.screen_info.green_pos = 0;
+ boot_params.screen_info.blue_pos = 0;
+ boot_params.screen_info.rsvd_pos = 0;
+}
+
+/* Save the VESA protected mode info */
+static void vesa_store_pm_info(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0x4f0a;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return;
+
+ boot_params.screen_info.vesapm_seg = oreg.es;
+ boot_params.screen_info.vesapm_off = oreg.di;
+}
+
+/*
+ * Save video mode parameters for graphics mode
+ */
+static void vesa_store_mode_params_graphics(void)
+{
+ /* Tell the kernel we're in VESA graphics mode */
+ boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
+
+ /* Mode parameters */
+ boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
+ boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
+ boot_params.screen_info.lfb_width = vminfo.h_res;
+ boot_params.screen_info.lfb_height = vminfo.v_res;
+ boot_params.screen_info.lfb_depth = vminfo.bpp;
+ boot_params.screen_info.pages = vminfo.image_planes;
+ boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
+ memcpy(&boot_params.screen_info.red_size,
+ &vminfo.rmask, 8);
+
+ /* General parameters */
+ boot_params.screen_info.lfb_size = vginfo.total_memory;
+
+ if (vminfo.bpp <= 8)
+ vesa_dac_set_8bits();
+
+ vesa_store_pm_info();
+}
+
+/*
+ * Save EDID information for the kernel; this is invoked, separately,
+ * after mode-setting.
+ */
+void vesa_store_edid(void)
+{
+#ifdef CONFIG_FIRMWARE_EDID
+ struct biosregs ireg, oreg;
+
+ /* Apparently used as a nonsense token... */
+ memset(&boot_params.edid_info, 0x13, sizeof(boot_params.edid_info));
+
+ if (vginfo.version < 0x0200)
+ return; /* EDID requires VBE 2.0+ */
+
+ initregs(&ireg);
+ ireg.ax = 0x4f15; /* VBE DDC */
+ /* ireg.bx = 0x0000; */ /* Report DDC capabilities */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ ireg.es = 0; /* ES:DI must be 0 by spec */
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return; /* No EDID */
+
+ /* BH = time in seconds to transfer EDD information */
+ /* BL = DDC level supported */
+
+ ireg.ax = 0x4f15; /* VBE DDC */
+ ireg.bx = 0x0001; /* Read EDID */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ /* ireg.dx = 0; */ /* EDID block number */
+ ireg.es = ds();
+ ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+ intcall(0x10, &ireg, &oreg);
+#endif /* CONFIG_FIRMWARE_EDID */
+}
+
+#endif /* not _WAKEUP */
+
+static __videocard video_vesa =
+{
+ .card_name = "VESA",
+ .probe = vesa_probe,
+ .set_mode = vesa_set_mode,
+ .xmode_first = VIDEO_FIRST_VESA,
+ .xmode_n = 0x200,
+};
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
new file mode 100644
index 000000000..4816cb9cf
--- /dev/null
+++ b/arch/x86/boot/video-vga.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Common all-VGA modes
+ */
+
+#include "boot.h"
+#include "video.h"
+
+static struct mode_info vga_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 50, 0 },
+ { VIDEO_80x43, 80, 43, 0 },
+ { VIDEO_80x28, 80, 28, 0 },
+ { VIDEO_80x30, 80, 30, 0 },
+ { VIDEO_80x34, 80, 34, 0 },
+ { VIDEO_80x60, 80, 60, 0 },
+};
+
+static struct mode_info ega_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 43, 0 },
+};
+
+static struct mode_info cga_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+};
+
+static __videocard video_vga;
+
+/* Set basic 80x25 mode */
+static u8 vga_set_basic_mode(void)
+{
+ struct biosregs ireg, oreg;
+ u8 mode;
+
+ initregs(&ireg);
+
+ /* Query current mode */
+ ireg.ax = 0x0f00;
+ intcall(0x10, &ireg, &oreg);
+ mode = oreg.al;
+
+ if (mode != 3 && mode != 7)
+ mode = 3;
+
+ /* Set the mode */
+ ireg.ax = mode; /* AH=0: set mode */
+ intcall(0x10, &ireg, NULL);
+ do_restore = 1;
+ return mode;
+}
+
+static void vga_set_8font(void)
+{
+ /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 8x8 font */
+ ireg.ax = 0x1112;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
+
+ /* Use alternate print screen */
+ ireg.ax = 0x1200;
+ ireg.bl = 0x20;
+ intcall(0x10, &ireg, NULL);
+
+ /* Turn off cursor emulation */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
+
+ /* Cursor is scan lines 6-7 */
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0607;
+ intcall(0x10, &ireg, NULL);
+}
+
+static void vga_set_14font(void)
+{
+ /* Set 9x14 font - 80x28 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 9x14 font */
+ ireg.ax = 0x1111;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
+
+ /* Turn off cursor emulation */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
+
+ /* Cursor is scan lines 11-12 */
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0b0c;
+ intcall(0x10, &ireg, NULL);
+}
+
+static void vga_set_80x43(void)
+{
+ /* Set 80x43 mode on VGA (not EGA) */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 350 scans */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x30;
+ intcall(0x10, &ireg, NULL);
+
+ /* Reset video mode */
+ ireg.ax = 0x0003;
+ intcall(0x10, &ireg, NULL);
+
+ vga_set_8font();
+}
+
+/* I/O address of the VGA CRTC */
+u16 vga_crtc(void)
+{
+ return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
+}
+
+static void vga_set_480_scanlines(void)
+{
+ u16 crtc; /* CRTC base address */
+ u8 csel; /* CRTC miscellaneous output register */
+
+ crtc = vga_crtc();
+
+ out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
+ out_idx(0x0b, crtc, 0x06); /* Vertical total */
+ out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
+ out_idx(0xea, crtc, 0x10); /* Vertical sync start */
+ out_idx(0xdf, crtc, 0x12); /* Vertical display end */
+ out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
+ out_idx(0x04, crtc, 0x16); /* Vertical blank end */
+ csel = inb(0x3cc);
+ csel &= 0x0d;
+ csel |= 0xe2;
+ outb(csel, 0x3c2);
+}
+
+static void vga_set_vertical_end(int lines)
+{
+ u16 crtc; /* CRTC base address */
+ u8 ovfw; /* CRTC overflow register */
+ int end = lines-1;
+
+ crtc = vga_crtc();
+
+ ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
+
+ out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
+ out_idx(end, crtc, 0x12); /* Vertical display end */
+}
+
+static void vga_set_80x30(void)
+{
+ vga_set_480_scanlines();
+ vga_set_vertical_end(30*16);
+}
+
+static void vga_set_80x34(void)
+{
+ vga_set_480_scanlines();
+ vga_set_14font();
+ vga_set_vertical_end(34*14);
+}
+
+static void vga_set_80x60(void)
+{
+ vga_set_480_scanlines();
+ vga_set_8font();
+ vga_set_vertical_end(60*8);
+}
+
+static int vga_set_mode(struct mode_info *mode)
+{
+ /* Set the basic mode */
+ vga_set_basic_mode();
+
+ /* Override a possibly broken BIOS */
+ force_x = mode->x;
+ force_y = mode->y;
+
+ switch (mode->mode) {
+ case VIDEO_80x25:
+ break;
+ case VIDEO_8POINT:
+ vga_set_8font();
+ break;
+ case VIDEO_80x43:
+ vga_set_80x43();
+ break;
+ case VIDEO_80x28:
+ vga_set_14font();
+ break;
+ case VIDEO_80x30:
+ vga_set_80x30();
+ break;
+ case VIDEO_80x34:
+ vga_set_80x34();
+ break;
+ case VIDEO_80x60:
+ vga_set_80x60();
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Note: this probe includes basic information required by all
+ * systems. It should be executed first, by making sure
+ * video-vga.c is listed first in the Makefile.
+ */
+static int vga_probe(void)
+{
+ static const char *card_name[] = {
+ "CGA/MDA/HGC", "EGA", "VGA"
+ };
+ static struct mode_info *mode_lists[] = {
+ cga_modes,
+ ega_modes,
+ vga_modes,
+ };
+ static int mode_count[] = {
+ ARRAY_SIZE(cga_modes),
+ ARRAY_SIZE(ega_modes),
+ ARRAY_SIZE(vga_modes),
+ };
+
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+
+ ireg.ax = 0x1200;
+ ireg.bl = 0x10; /* Check EGA/VGA */
+ intcall(0x10, &ireg, &oreg);
+
+#ifndef _WAKEUP
+ boot_params.screen_info.orig_video_ega_bx = oreg.bx;
+#endif
+
+ /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
+ if (oreg.bl != 0x10) {
+ /* EGA/VGA */
+ ireg.ax = 0x1a00;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.al == 0x1a) {
+ adapter = ADAPTER_VGA;
+#ifndef _WAKEUP
+ boot_params.screen_info.orig_video_isVGA = 1;
+#endif
+ } else {
+ adapter = ADAPTER_EGA;
+ }
+ } else {
+ adapter = ADAPTER_CGA;
+ }
+
+ video_vga.modes = mode_lists[adapter];
+ video_vga.card_name = card_name[adapter];
+ return mode_count[adapter];
+}
+
+static __videocard video_vga = {
+ .card_name = "VGA",
+ .probe = vga_probe,
+ .set_mode = vga_set_mode,
+};
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
new file mode 100644
index 000000000..f2e96905b
--- /dev/null
+++ b/arch/x86/boot/video.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Select video mode
+ */
+
+#include <uapi/asm/boot.h>
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+static u16 video_segment;
+
+static void store_cursor_position(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x03;
+ intcall(0x10, &ireg, &oreg);
+
+ boot_params.screen_info.orig_x = oreg.dl;
+ boot_params.screen_info.orig_y = oreg.dh;
+
+ if (oreg.ch & 0x20)
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+
+ if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+}
+
+static void store_video_mode(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* N.B.: the saving of the video page here is a bit silly,
+ since we pretty much assume page 0 everywhere. */
+ initregs(&ireg);
+ ireg.ah = 0x0f;
+ intcall(0x10, &ireg, &oreg);
+
+ /* Not all BIOSes are clean with respect to the top bit */
+ boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+ boot_params.screen_info.orig_video_page = oreg.bh;
+}
+
+/*
+ * Store the video mode parameters for later usage by the kernel.
+ * This is done by asking the BIOS except for the rows/columns
+ * parameters in the default 80x25 mode -- these are set directly,
+ * because some very obscure BIOSes supply insane values.
+ */
+static void store_mode_params(void)
+{
+ u16 font_size;
+ int x, y;
+
+ /* For graphics mode, it is up to the mode-setting driver
+ (currently only video-vesa.c) to store the parameters */
+ if (graphic_mode)
+ return;
+
+ store_cursor_position();
+ store_video_mode();
+
+ if (boot_params.screen_info.orig_video_mode == 0x07) {
+ /* MDA, HGC, or VGA in monochrome mode */
+ video_segment = 0xb000;
+ } else {
+ /* CGA, EGA, VGA and so forth */
+ video_segment = 0xb800;
+ }
+
+ set_fs(0);
+ font_size = rdfs16(0x485); /* Font size, BIOS area */
+ boot_params.screen_info.orig_video_points = font_size;
+
+ x = rdfs16(0x44a);
+ y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
+
+ if (force_x)
+ x = force_x;
+ if (force_y)
+ y = force_y;
+
+ boot_params.screen_info.orig_video_cols = x;
+ boot_params.screen_info.orig_video_lines = y;
+}
+
+static unsigned int get_entry(void)
+{
+ char entry_buf[4];
+ int i, len = 0;
+ int key;
+ unsigned int v;
+
+ do {
+ key = getchar();
+
+ if (key == '\b') {
+ if (len > 0) {
+ puts("\b \b");
+ len--;
+ }
+ } else if ((key >= '0' && key <= '9') ||
+ (key >= 'A' && key <= 'Z') ||
+ (key >= 'a' && key <= 'z')) {
+ if (len < sizeof(entry_buf)) {
+ entry_buf[len++] = key;
+ putchar(key);
+ }
+ }
+ } while (key != '\r');
+ putchar('\n');
+
+ if (len == 0)
+ return VIDEO_CURRENT_MODE; /* Default */
+
+ v = 0;
+ for (i = 0; i < len; i++) {
+ v <<= 4;
+ key = entry_buf[i] | 0x20;
+ v += (key > '9') ? key-'a'+10 : key-'0';
+ }
+
+ return v;
+}
+
+static void display_menu(void)
+{
+ struct card_info *card;
+ struct mode_info *mi;
+ char ch;
+ int i;
+ int nmodes;
+ int modes_per_line;
+ int col;
+
+ nmodes = 0;
+ for (card = video_cards; card < video_cards_end; card++)
+ nmodes += card->nmodes;
+
+ modes_per_line = 1;
+ if (nmodes >= 20)
+ modes_per_line = 3;
+
+ for (col = 0; col < modes_per_line; col++)
+ puts("Mode: Resolution: Type: ");
+ putchar('\n');
+
+ col = 0;
+ ch = '0';
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ char resbuf[32];
+ int visible = mi->x && mi->y;
+ u16 mode_id = mi->mode ? mi->mode :
+ (mi->y << 8)+mi->x;
+
+ if (!visible)
+ continue; /* Hidden mode */
+
+ if (mi->depth)
+ sprintf(resbuf, "%dx%d", mi->y, mi->depth);
+ else
+ sprintf(resbuf, "%d", mi->y);
+
+ printf("%c %03X %4dx%-7s %-6s",
+ ch, mode_id, mi->x, resbuf, card->card_name);
+ col++;
+ if (col >= modes_per_line) {
+ putchar('\n');
+ col = 0;
+ }
+
+ if (ch == '9')
+ ch = 'a';
+ else if (ch == 'z' || ch == ' ')
+ ch = ' '; /* Out of keys... */
+ else
+ ch++;
+ }
+ }
+ if (col)
+ putchar('\n');
+}
+
+#define H(x) ((x)-'a'+10)
+#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
+
+static unsigned int mode_menu(void)
+{
+ int key;
+ unsigned int sel;
+
+ puts("Press <ENTER> to see video modes available, "
+ "<SPACE> to continue, or wait 30 sec\n");
+
+ kbd_flush();
+ while (1) {
+ key = getchar_timeout();
+ if (key == ' ' || key == 0)
+ return VIDEO_CURRENT_MODE; /* Default */
+ if (key == '\r')
+ break;
+ putchar('\a'); /* Beep! */
+ }
+
+
+ for (;;) {
+ display_menu();
+
+ puts("Enter a video mode or \"scan\" to scan for "
+ "additional modes: ");
+ sel = get_entry();
+ if (sel != SCAN)
+ return sel;
+
+ probe_cards(1);
+ }
+}
+
+/* Save screen content to the heap */
+static struct saved_screen {
+ int x, y;
+ int curx, cury;
+ u16 *data;
+} saved;
+
+static void save_screen(void)
+{
+ /* Should be called after store_mode_params() */
+ saved.x = boot_params.screen_info.orig_video_cols;
+ saved.y = boot_params.screen_info.orig_video_lines;
+ saved.curx = boot_params.screen_info.orig_x;
+ saved.cury = boot_params.screen_info.orig_y;
+
+ if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
+ return; /* Not enough heap to save the screen */
+
+ saved.data = GET_HEAP(u16, saved.x*saved.y);
+
+ set_fs(video_segment);
+ copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
+}
+
+static void restore_screen(void)
+{
+ /* Should be called after store_mode_params() */
+ int xs = boot_params.screen_info.orig_video_cols;
+ int ys = boot_params.screen_info.orig_video_lines;
+ int y;
+ addr_t dst = 0;
+ u16 *src = saved.data;
+ struct biosregs ireg;
+
+ if (graphic_mode)
+ return; /* Can't restore onto a graphic mode */
+
+ if (!src)
+ return; /* No saved screen contents */
+
+ /* Restore screen contents */
+
+ set_fs(video_segment);
+ for (y = 0; y < ys; y++) {
+ int npad;
+
+ if (y < saved.y) {
+ int copy = (xs < saved.x) ? xs : saved.x;
+ copy_to_fs(dst, src, copy*sizeof(u16));
+ dst += copy*sizeof(u16);
+ src += saved.x;
+ npad = (xs < saved.x) ? 0 : xs-saved.x;
+ } else {
+ npad = xs;
+ }
+
+ /* Writes "npad" blank characters to
+ video_segment:dst and advances dst */
+ asm volatile("pushw %%es ; "
+ "movw %2,%%es ; "
+ "shrw %%cx ; "
+ "jnc 1f ; "
+ "stosw \n\t"
+ "1: rep;stosl ; "
+ "popw %%es"
+ : "+D" (dst), "+c" (npad)
+ : "bdS" (video_segment),
+ "a" (0x07200720));
+ }
+
+ /* Restore cursor position */
+ if (saved.curx >= xs)
+ saved.curx = xs-1;
+ if (saved.cury >= ys)
+ saved.cury = ys-1;
+
+ initregs(&ireg);
+ ireg.ah = 0x02; /* Set cursor position */
+ ireg.dh = saved.cury;
+ ireg.dl = saved.curx;
+ intcall(0x10, &ireg, NULL);
+
+ store_cursor_position();
+}
+
+void set_video(void)
+{
+ u16 mode = boot_params.hdr.vid_mode;
+
+ RESET_HEAP();
+
+ store_mode_params();
+ save_screen();
+ probe_cards(0);
+
+ for (;;) {
+ if (mode == ASK_VGA)
+ mode = mode_menu();
+
+ if (!set_mode(mode))
+ break;
+
+ printf("Undefined video mode number: %x\n", mode);
+ mode = ASK_VGA;
+ }
+ boot_params.hdr.vid_mode = mode;
+ vesa_store_edid();
+ store_mode_params();
+
+ if (do_restore)
+ restore_screen();
+}
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
new file mode 100644
index 000000000..04bde0bb2
--- /dev/null
+++ b/arch/x86/boot/video.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Header file for the real-mode video probing code
+ */
+
+#ifndef BOOT_VIDEO_H
+#define BOOT_VIDEO_H
+
+#include <linux/types.h>
+
+/*
+ * This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ * NORMAL_VGA (-1)
+ * EXTENDED_VGA (-2)
+ * ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+/* Special video modes */
+#define VIDEO_FIRST_SPECIAL 0x0f00
+#define VIDEO_80x25 0x0f00
+#define VIDEO_8POINT 0x0f01
+#define VIDEO_80x43 0x0f02
+#define VIDEO_80x28 0x0f03
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_GFX_HACK 0x0f08
+#define VIDEO_LAST_SPECIAL 0x0f09
+
+/* Video modes given by resolution */
+#define VIDEO_FIRST_RESOLUTION 0x1000
+
+/* The "recalculate timings" flag */
+#define VIDEO_RECALC 0x8000
+
+void store_screen(void);
+#define DO_STORE() store_screen()
+
+/*
+ * Mode table structures
+ */
+
+struct mode_info {
+ u16 mode; /* Mode number (vga= style) */
+ u16 x, y; /* Width, height */
+ u16 depth; /* Bits per pixel, 0 for text mode */
+};
+
+struct card_info {
+ const char *card_name;
+ int (*set_mode)(struct mode_info *mode);
+ int (*probe)(void);
+ struct mode_info *modes;
+ int nmodes; /* Number of probed modes so far */
+ int unsafe; /* Probing is unsafe, only do after "scan" */
+ u16 xmode_first; /* Unprobed modes to try to call anyway */
+ u16 xmode_n; /* Size of unprobed mode range */
+};
+
+#define __videocard struct card_info __section(".videocards") __attribute__((used))
+extern struct card_info video_cards[], video_cards_end[];
+
+int mode_defined(u16 mode); /* video.c */
+
+/* Basic video information */
+#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
+#define ADAPTER_EGA 1
+#define ADAPTER_VGA 2
+
+extern int adapter;
+extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
+extern int do_restore; /* Restore screen contents */
+extern int graphic_mode; /* Graphics mode with linear frame buffer */
+
+/* Accessing VGA indexed registers */
+static inline u8 in_idx(u16 port, u8 index)
+{
+ outb(index, port);
+ return inb(port+1);
+}
+
+static inline void out_idx(u8 v, u16 port, u8 index)
+{
+ outw(index+(v << 8), port);
+}
+
+/* Writes a value to an indexed port and then reads the port again */
+static inline u8 tst_idx(u8 v, u16 port, u8 index)
+{
+ out_idx(port, index, v);
+ return in_idx(port, index);
+}
+
+/* Get the I/O port of the VGA CRTC */
+u16 vga_crtc(void); /* video-vga.c */
+
+#endif /* BOOT_VIDEO_H */