summaryrefslogtreecommitdiffstats
path: root/arch/x86/boot
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/boot')
-rw-r--r--arch/x86/boot/.gitignore12
-rw-r--r--arch/x86/boot/Makefile156
-rw-r--r--arch/x86/boot/a20.c165
-rw-r--r--arch/x86/boot/apm.c75
-rw-r--r--arch/x86/boot/bioscall.S82
-rw-r--r--arch/x86/boot/bitops.h46
-rw-r--r--arch/x86/boot/boot.h358
-rw-r--r--arch/x86/boot/cmdline.c158
-rw-r--r--arch/x86/boot/code16gcc.h12
-rw-r--r--arch/x86/boot/compressed/.gitignore6
-rw-r--r--arch/x86/boot/compressed/Makefile160
-rw-r--r--arch/x86/boot/compressed/cmdline.c34
-rw-r--r--arch/x86/boot/compressed/cpuflags.c13
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c5
-rw-r--r--arch/x86/boot/compressed/eboot.c934
-rw-r--r--arch/x86/boot/compressed/eboot.h33
-rw-r--r--arch/x86/boot/compressed/efi_stub_32.S87
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S5
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S197
-rw-r--r--arch/x86/boot/compressed/error.c24
-rw-r--r--arch/x86/boot/compressed/error.h10
-rw-r--r--arch/x86/boot/compressed/head_32.S283
-rw-r--r--arch/x86/boot/compressed/head_64.S721
-rw-r--r--arch/x86/boot/compressed/kaslr.c867
-rw-r--r--arch/x86/boot/compressed/kaslr_64.c153
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S104
-rw-r--r--arch/x86/boot/compressed/misc.c429
-rw-r--r--arch/x86/boot/compressed/misc.h116
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c82
-rw-r--r--arch/x86/boot/compressed/pgtable.h20
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c211
-rw-r--r--arch/x86/boot/compressed/string.c75
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S76
-rw-r--r--arch/x86/boot/copy.S67
-rw-r--r--arch/x86/boot/cpu.c101
-rw-r--r--arch/x86/boot/cpucheck.c229
-rw-r--r--arch/x86/boot/cpuflags.c129
-rw-r--r--arch/x86/boot/cpuflags.h21
-rw-r--r--arch/x86/boot/ctype.h21
-rw-r--r--arch/x86/boot/early_serial_console.c154
-rw-r--r--arch/x86/boot/edd.c182
-rw-r--r--arch/x86/boot/genimage.sh130
-rw-r--r--arch/x86/boot/header.S636
-rw-r--r--arch/x86/boot/install.sh59
-rw-r--r--arch/x86/boot/main.c182
-rw-r--r--arch/x86/boot/memory.c136
-rw-r--r--arch/x86/boot/mkcpustr.c52
-rw-r--r--arch/x86/boot/mtools.conf.in17
-rw-r--r--arch/x86/boot/pm.c126
-rw-r--r--arch/x86/boot/pmjump.S77
-rw-r--r--arch/x86/boot/printf.c309
-rw-r--r--arch/x86/boot/regs.c30
-rw-r--r--arch/x86/boot/setup.ld64
-rw-r--r--arch/x86/boot/string.c197
-rw-r--r--arch/x86/boot/string.h32
-rw-r--r--arch/x86/boot/tools/.gitignore1
-rw-r--r--arch/x86/boot/tools/build.c442
-rw-r--r--arch/x86/boot/tty.c139
-rw-r--r--arch/x86/boot/version.c21
-rw-r--r--arch/x86/boot/vesa.h72
-rw-r--r--arch/x86/boot/video-bios.c128
-rw-r--r--arch/x86/boot/video-mode.c173
-rw-r--r--arch/x86/boot/video-vesa.c281
-rw-r--r--arch/x86/boot/video-vga.c288
-rw-r--r--arch/x86/boot/video.c345
-rw-r--r--arch/x86/boot/video.h120
66 files changed, 10670 insertions, 0 deletions
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
new file mode 100644
index 000000000..09d25dd09
--- /dev/null
+++ b/arch/x86/boot/.gitignore
@@ -0,0 +1,12 @@
+bootsect
+bzImage
+cpustr.h
+mkcpustr
+voffset.h
+zoffset.h
+setup
+setup.bin
+setup.elf
+fdimage
+mtools.conf
+image.iso
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
new file mode 100644
index 000000000..6539c50fb
--- /dev/null
+++ b/arch/x86/boot/Makefile
@@ -0,0 +1,156 @@
+#
+# arch/x86/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+# Changed by many, many contributors over the years.
+#
+
+KASAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+
+# Kernel does not boot with kcov instrumentation here.
+# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
+# callback into middle of per-cpu data enabling code. Thus the callback observed
+# inconsistent state and crashed. We are interested mostly in syscall coverage,
+# so boot code is not interesting anyway.
+KCOV_INSTRUMENT := n
+
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+
+SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
+
+targets := vmlinux.bin setup.bin setup.elf bzImage
+targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
+subdir- := compressed
+
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
+setup-y += early_serial_console.o edd.o header.o main.o memory.o
+setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
+setup-y += video-mode.o version.o
+setup-$(CONFIG_X86_APM_BOOT) += apm.o
+
+# The link order of the video-*.o modules can matter. In particular,
+# video-vga.o *must* be listed first, followed by video-vesa.o.
+# Hardware-specific drivers should follow in the order they should be
+# probed, and video-bios.o should typically be last.
+setup-y += video-vga.o
+setup-y += video-vesa.o
+setup-y += video-bios.o
+
+targets += $(setup-y)
+hostprogs-y := tools/build
+hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
+ -include include/generated/autoconf.h \
+ -D__EXPORTED_HEADERS__
+
+ifdef CONFIG_X86_FEATURE_NAMES
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR $@
+ cmd_cpustr = $(obj)/mkcpustr > $@
+targets += cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+ $(call if_changed,cpustr)
+endif
+clean-files += cpustr.h
+
+# ---------------------------------------------------------------------------
+
+KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+
+$(obj)/bzImage: asflags-y := $(SVGA_MODE)
+
+quiet_cmd_image = BUILD $@
+silent_redirect_image = >/dev/null
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
+ $(obj)/zoffset.h $@ $($(quiet)redirect_image)
+
+$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+ $(call if_changed,image)
+ @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
+$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,objcopy)
+
+SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
+
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,zoffset)
+
+
+AFLAGS_header.o += -I$(objtree)/$(obj)
+$(obj)/header.o: $(obj)/zoffset.h
+
+LDFLAGS_setup.elf := -m elf_i386 -T
+$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
+ $(call if_changed,ld)
+
+OBJCOPYFLAGS_setup.bin := -O binary
+$(obj)/setup.bin: $(obj)/setup.elf FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+# Set this if you want to pass append arguments to the
+# bzdisk/fdimage/isoimage kernel
+FDARGS =
+# Set this if you want an initrd included with the
+# bzdisk/fdimage/isoimage kernel
+FDINITRD =
+
+image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
+
+$(obj)/mtools.conf: $(src)/mtools.conf.in
+ sed -e 's|@OBJ@|$(obj)|g' < $< > $@
+
+quiet_cmd_genimage = GENIMAGE $3
+cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
+ $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
+
+# This requires write access to /dev/fd0
+bzdisk: $(obj)/bzImage $(obj)/mtools.conf
+ $(call cmd,genimage,bzdisk,/dev/fd0)
+
+# These require being root or having syslinux 2.02 or higher installed
+fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
+ $(call cmd,genimage,fdimage144,$(obj)/fdimage)
+ @$(kecho) 'Kernel: $(obj)/fdimage is ready'
+
+fdimage288: $(obj)/bzImage $(obj)/mtools.conf
+ $(call cmd,genimage,fdimage288,$(obj)/fdimage)
+ @$(kecho) 'Kernel: $(obj)/fdimage is ready'
+
+isoimage: $(obj)/bzImage
+ $(call cmd,genimage,isoimage,$(obj)/image.iso)
+ @$(kecho) 'Kernel: $(obj)/image.iso is ready'
+
+bzlilo: $(obj)/bzImage
+ if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
+ if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+ cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
+ cp System.map $(INSTALL_PATH)/
+ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+
+install:
+ sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+ System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
new file mode 100644
index 000000000..64a31a6d7
--- /dev/null
+++ b/arch/x86/boot/a20.c
@@ -0,0 +1,165 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Enable A20 gate (return -1 on failure)
+ */
+
+#include "boot.h"
+
+#define MAX_8042_LOOPS 100000
+#define MAX_8042_FF 32
+
+static int empty_8042(void)
+{
+ u8 status;
+ int loops = MAX_8042_LOOPS;
+ int ffs = MAX_8042_FF;
+
+ while (loops--) {
+ io_delay();
+
+ status = inb(0x64);
+ if (status == 0xff) {
+ /* FF is a plausible, but very unlikely status */
+ if (!--ffs)
+ return -1; /* Assume no KBC present */
+ }
+ if (status & 1) {
+ /* Read and discard input data */
+ io_delay();
+ (void)inb(0x60);
+ } else if (!(status & 2)) {
+ /* Buffers empty, finished! */
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+/* Returns nonzero if the A20 line is enabled. The memory address
+ used as a test is the int $0x80 vector, which should be safe. */
+
+#define A20_TEST_ADDR (4*0x80)
+#define A20_TEST_SHORT 32
+#define A20_TEST_LONG 2097152 /* 2^21 */
+
+static int a20_test(int loops)
+{
+ int ok = 0;
+ int saved, ctr;
+
+ set_fs(0x0000);
+ set_gs(0xffff);
+
+ saved = ctr = rdfs32(A20_TEST_ADDR);
+
+ while (loops--) {
+ wrfs32(++ctr, A20_TEST_ADDR);
+ io_delay(); /* Serialize and make delay constant */
+ ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
+ if (ok)
+ break;
+ }
+
+ wrfs32(saved, A20_TEST_ADDR);
+ return ok;
+}
+
+/* Quick test to see if A20 is already enabled */
+static int a20_test_short(void)
+{
+ return a20_test(A20_TEST_SHORT);
+}
+
+/* Longer test that actually waits for A20 to come on line; this
+ is useful when dealing with the KBC or other slow external circuitry. */
+static int a20_test_long(void)
+{
+ return a20_test(A20_TEST_LONG);
+}
+
+static void enable_a20_bios(void)
+{
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.ax = 0x2401;
+ intcall(0x15, &ireg, NULL);
+}
+
+static void enable_a20_kbc(void)
+{
+ empty_8042();
+
+ outb(0xd1, 0x64); /* Command write */
+ empty_8042();
+
+ outb(0xdf, 0x60); /* A20 on */
+ empty_8042();
+
+ outb(0xff, 0x64); /* Null command, but UHCI wants it */
+ empty_8042();
+}
+
+static void enable_a20_fast(void)
+{
+ u8 port_a;
+
+ port_a = inb(0x92); /* Configuration port A */
+ port_a |= 0x02; /* Enable A20 */
+ port_a &= ~0x01; /* Do not reset machine */
+ outb(port_a, 0x92);
+}
+
+/*
+ * Actual routine to enable A20; return 0 on ok, -1 on failure
+ */
+
+#define A20_ENABLE_LOOPS 255 /* Number of times to try */
+
+int enable_a20(void)
+{
+ int loops = A20_ENABLE_LOOPS;
+ int kbc_err;
+
+ while (loops--) {
+ /* First, check to see if A20 is already enabled
+ (legacy free, etc.) */
+ if (a20_test_short())
+ return 0;
+
+ /* Next, try the BIOS (INT 0x15, AX=0x2401) */
+ enable_a20_bios();
+ if (a20_test_short())
+ return 0;
+
+ /* Try enabling A20 through the keyboard controller */
+ kbc_err = empty_8042();
+
+ if (a20_test_short())
+ return 0; /* BIOS worked, but with delayed reaction */
+
+ if (!kbc_err) {
+ enable_a20_kbc();
+ if (a20_test_long())
+ return 0;
+ }
+
+ /* Finally, try enabling the "fast A20 gate" */
+ enable_a20_fast();
+ if (a20_test_long())
+ return 0;
+ }
+
+ return -1;
+}
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
new file mode 100644
index 000000000..ee274834e
--- /dev/null
+++ b/arch/x86/boot/apm.c
@@ -0,0 +1,75 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * Original APM BIOS checking by Stephen Rothwell, May 1994
+ * (sfr@canb.auug.org.au)
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Get APM BIOS information
+ */
+
+#include "boot.h"
+
+int query_apm_bios(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* APM BIOS installation check */
+ initregs(&ireg);
+ ireg.ah = 0x53;
+ intcall(0x15, &ireg, &oreg);
+
+ if (oreg.flags & X86_EFLAGS_CF)
+ return -1; /* No APM BIOS */
+
+ if (oreg.bx != 0x504d) /* "PM" signature */
+ return -1;
+
+ if (!(oreg.cx & 0x02)) /* 32 bits supported? */
+ return -1;
+
+ /* Disconnect first, just in case */
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
+
+ /* 32-bit connect */
+ ireg.al = 0x03;
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.apm_bios_info.cseg = oreg.ax;
+ boot_params.apm_bios_info.offset = oreg.ebx;
+ boot_params.apm_bios_info.cseg_16 = oreg.cx;
+ boot_params.apm_bios_info.dseg = oreg.dx;
+ boot_params.apm_bios_info.cseg_len = oreg.si;
+ boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+ boot_params.apm_bios_info.dseg_len = oreg.di;
+
+ if (oreg.flags & X86_EFLAGS_CF)
+ return -1;
+
+ /* Redo the installation check as the 32-bit connect;
+ some BIOSes return different flags this way... */
+
+ ireg.al = 0x00;
+ intcall(0x15, &ireg, &oreg);
+
+ if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
+ /* Failure with 32-bit connect, try to disconect and ignore */
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
+ return -1;
+ }
+
+ boot_params.apm_bios_info.version = oreg.ax;
+ boot_params.apm_bios_info.flags = oreg.cx;
+ return 0;
+}
+
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 000000000..d401b4a26
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
+ * touching registers they shouldn't be.
+ */
+
+ .code16
+ .section ".inittext","ax"
+ .globl intcall
+ .type intcall, @function
+intcall:
+ /* Self-modify the INT instruction. Ugly, but works. */
+ cmpb %al, 3f
+ je 1f
+ movb %al, 3f
+ jmp 1f /* Synchronize pipeline */
+1:
+ /* Save state */
+ pushfl
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Copy input state to stack frame */
+ subw $44, %sp
+ movw %dx, %si
+ movw %sp, %di
+ movw $11, %cx
+ rep; movsd
+
+ /* Pop full state from the stack */
+ popal
+ popw %gs
+ popw %fs
+ popw %es
+ popw %ds
+ popfl
+
+ /* Actual INT */
+ .byte 0xcd /* INT opcode */
+3: .byte 0
+
+ /* Push full state to the stack */
+ pushfl
+ pushw %ds
+ pushw %es
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Re-establish C environment invariants */
+ cld
+ movzwl %sp, %esp
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+
+ /* Copy output state from stack frame */
+ movw 68(%esp), %di /* Original %cx == 3rd argument */
+ andw %di, %di
+ jz 4f
+ movw %sp, %si
+ movw $11, %cx
+ rep; movsd
+4: addw $44, %sp
+
+ /* Restore state and return */
+ popal
+ popw %gs
+ popw %fs
+ popfl
+ retl
+ .size intcall, .-intcall
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
new file mode 100644
index 000000000..2e1382486
--- /dev/null
+++ b/arch/x86/boot/bitops.h
@@ -0,0 +1,46 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very simple bitops for the boot code.
+ */
+
+#ifndef BOOT_BITOPS_H
+#define BOOT_BITOPS_H
+#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
+
+#include <linux/types.h>
+#include <asm/asm.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
+{
+ const u32 *p = (const u32 *)addr;
+ return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
+}
+static inline bool variable_test_bit(int nr, const void *addr)
+{
+ bool v;
+ const u32 *p = (const u32 *)addr;
+
+ asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr));
+ return v;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+static inline void set_bit(int nr, void *addr)
+{
+ asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
+}
+
+#endif /* BOOT_BITOPS_H */
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
new file mode 100644
index 000000000..ef5a9cc66
--- /dev/null
+++ b/arch/x86/boot/boot.h
@@ -0,0 +1,358 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Header file for the real-mode kernel code
+ */
+
+#ifndef BOOT_BOOT_H
+#define BOOT_BOOT_H
+
+#define STACK_SIZE 1024 /* Minimum number of bytes for stack */
+
+#ifndef __ASSEMBLY__
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/edd.h>
+#include <asm/setup.h>
+#include <asm/asm.h>
+#include "bitops.h"
+#include "ctype.h"
+#include "cpuflags.h"
+
+/* Useful macros */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+extern struct setup_header hdr;
+extern struct boot_params boot_params;
+
+#define cpu_relax() asm volatile("rep; nop")
+
+/* Basic port I/O */
+static inline void outb(u8 v, u16 port)
+{
+ asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
+}
+static inline u8 inb(u16 port)
+{
+ u8 v;
+ asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
+
+static inline void outw(u16 v, u16 port)
+{
+ asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
+}
+static inline u16 inw(u16 port)
+{
+ u16 v;
+ asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
+
+static inline void outl(u32 v, u16 port)
+{
+ asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
+}
+static inline u32 inl(u16 port)
+{
+ u32 v;
+ asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
+
+static inline void io_delay(void)
+{
+ const u16 DELAY_PORT = 0x80;
+ asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
+}
+
+/* These functions are used to reference data in other segments. */
+
+static inline u16 ds(void)
+{
+ u16 seg;
+ asm("movw %%ds,%0" : "=rm" (seg));
+ return seg;
+}
+
+static inline void set_fs(u16 seg)
+{
+ asm volatile("movw %0,%%fs" : : "rm" (seg));
+}
+static inline u16 fs(void)
+{
+ u16 seg;
+ asm volatile("movw %%fs,%0" : "=rm" (seg));
+ return seg;
+}
+
+static inline void set_gs(u16 seg)
+{
+ asm volatile("movw %0,%%gs" : : "rm" (seg));
+}
+static inline u16 gs(void)
+{
+ u16 seg;
+ asm volatile("movw %%gs,%0" : "=rm" (seg));
+ return seg;
+}
+
+typedef unsigned int addr_t;
+
+static inline u8 rdfs8(addr_t addr)
+{
+ u8 v;
+ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ return v;
+}
+static inline u16 rdfs16(addr_t addr)
+{
+ u16 v;
+ asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ return v;
+}
+static inline u32 rdfs32(addr_t addr)
+{
+ u32 v;
+ asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ return v;
+}
+
+static inline void wrfs8(u8 v, addr_t addr)
+{
+ asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+}
+static inline void wrfs16(u16 v, addr_t addr)
+{
+ asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+}
+static inline void wrfs32(u32 v, addr_t addr)
+{
+ asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+}
+
+static inline u8 rdgs8(addr_t addr)
+{
+ u8 v;
+ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ return v;
+}
+static inline u16 rdgs16(addr_t addr)
+{
+ u16 v;
+ asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ return v;
+}
+static inline u32 rdgs32(addr_t addr)
+{
+ u32 v;
+ asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ return v;
+}
+
+static inline void wrgs8(u8 v, addr_t addr)
+{
+ asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+}
+static inline void wrgs16(u16 v, addr_t addr)
+{
+ asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+}
+static inline void wrgs32(u32 v, addr_t addr)
+{
+ asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+}
+
+/* Note: these only return true/false, not a signed return value! */
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
+{
+ bool diff;
+ asm volatile("fs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
+{
+ bool diff;
+ asm volatile("gs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
+/* Heap -- available for dynamic lists. */
+extern char _end[];
+extern char *HEAP;
+extern char *heap_end;
+#define RESET_HEAP() ((void *)( HEAP = _end ))
+static inline char *__get_heap(size_t s, size_t a, size_t n)
+{
+ char *tmp;
+
+ HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
+ tmp = HEAP;
+ HEAP += s*n;
+ return tmp;
+}
+#define GET_HEAP(type, n) \
+ ((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
+
+static inline bool heap_free(size_t n)
+{
+ return (int)(heap_end-HEAP) >= (int)n;
+}
+
+/* copy.S */
+
+void copy_to_fs(addr_t dst, void *src, size_t len);
+void *copy_from_fs(void *dst, addr_t src, size_t len);
+void copy_to_gs(addr_t dst, void *src, size_t len);
+void *copy_from_gs(void *dst, addr_t src, size_t len);
+
+/* a20.c */
+int enable_a20(void);
+
+/* apm.c */
+int query_apm_bios(void);
+
+/* bioscall.c */
+struct biosregs {
+ union {
+ struct {
+ u32 edi;
+ u32 esi;
+ u32 ebp;
+ u32 _esp;
+ u32 ebx;
+ u32 edx;
+ u32 ecx;
+ u32 eax;
+ u32 _fsgs;
+ u32 _dses;
+ u32 eflags;
+ };
+ struct {
+ u16 di, hdi;
+ u16 si, hsi;
+ u16 bp, hbp;
+ u16 _sp, _hsp;
+ u16 bx, hbx;
+ u16 dx, hdx;
+ u16 cx, hcx;
+ u16 ax, hax;
+ u16 gs, fs;
+ u16 es, ds;
+ u16 flags, hflags;
+ };
+ struct {
+ u8 dil, dih, edi2, edi3;
+ u8 sil, sih, esi2, esi3;
+ u8 bpl, bph, ebp2, ebp3;
+ u8 _spl, _sph, _esp2, _esp3;
+ u8 bl, bh, ebx2, ebx3;
+ u8 dl, dh, edx2, edx3;
+ u8 cl, ch, ecx2, ecx3;
+ u8 al, ah, eax2, eax3;
+ };
+ };
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
+/* cmdline.c */
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
+static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
+{
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
+}
+
+static inline int cmdline_find_option_bool(const char *option)
+{
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option_bool(cmd_line_ptr, option);
+}
+
+/* cpu.c, cpucheck.c */
+int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
+int check_knl_erratum(void);
+int validate_cpu(void);
+
+/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+
+/* edd.c */
+void query_edd(void);
+
+/* header.S */
+void __attribute__((noreturn)) die(void);
+
+/* memory.c */
+int detect_memory(void);
+
+/* pm.c */
+void __attribute__((noreturn)) go_to_protected_mode(void);
+
+/* pmjump.S */
+void __attribute__((noreturn))
+ protected_mode_jump(u32 entrypoint, u32 bootparams);
+
+/* printf.c */
+int sprintf(char *buf, const char *fmt, ...);
+int vsprintf(char *buf, const char *fmt, va_list args);
+int printf(const char *fmt, ...);
+
+/* regs.c */
+void initregs(struct biosregs *regs);
+
+/* string.c */
+int strcmp(const char *str1, const char *str2);
+int strncmp(const char *cs, const char *ct, size_t count);
+size_t strnlen(const char *s, size_t maxlen);
+unsigned int atou(const char *s);
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
+size_t strlen(const char *s);
+char *strchr(const char *s, int c);
+
+/* tty.c */
+void puts(const char *);
+void putchar(int);
+int getchar(void);
+void kbd_flush(void);
+int getchar_timeout(void);
+
+/* video.c */
+void set_video(void);
+
+/* video-mode.c */
+int set_mode(u16 mode);
+int mode_defined(u16 mode);
+void probe_cards(int unsafe);
+
+/* video-vesa.c */
+void vesa_store_edid(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* BOOT_BOOT_H */
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
new file mode 100644
index 000000000..625d21b0c
--- /dev/null
+++ b/arch/x86/boot/cmdline.c
@@ -0,0 +1,158 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple command-line parser for early boot.
+ */
+
+#include "boot.h"
+
+static inline int myisspace(u8 c)
+{
+ return c <= ' '; /* Close enough approximation */
+}
+
+/*
+ * Find a non-boolean option, that is, "option=argument". In accordance
+ * with standard Linux practice, if this option is repeated, this returns
+ * the last instance on the command line.
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
+{
+ addr_t cptr;
+ char c;
+ int len = -1;
+ const char *opptr = NULL;
+ char *bufptr = buffer;
+ enum {
+ st_wordstart, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ st_bufcpy /* Copying this to buffer */
+ } state = st_wordstart;
+
+ if (!cmdline_ptr)
+ return -1; /* No command line */
+
+ cptr = cmdline_ptr & 0xf;
+ set_fs(cmdline_ptr >> 4);
+
+ while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
+ switch (state) {
+ case st_wordstart:
+ if (myisspace(c))
+ break;
+
+ /* else */
+ state = st_wordcmp;
+ opptr = option;
+ /* fall through */
+
+ case st_wordcmp:
+ if (c == '=' && !*opptr) {
+ len = 0;
+ bufptr = buffer;
+ state = st_bufcpy;
+ } else if (myisspace(c)) {
+ state = st_wordstart;
+ } else if (c != *opptr++) {
+ state = st_wordskip;
+ }
+ break;
+
+ case st_wordskip:
+ if (myisspace(c))
+ state = st_wordstart;
+ break;
+
+ case st_bufcpy:
+ if (myisspace(c)) {
+ state = st_wordstart;
+ } else {
+ if (len < bufsize-1)
+ *bufptr++ = c;
+ len++;
+ }
+ break;
+ }
+ }
+
+ if (bufsize)
+ *bufptr = '\0';
+
+ return len;
+}
+
+/*
+ * Find a boolean option (like quiet,noapic,nosmp....)
+ *
+ * Returns the position of that option (starts counting with 1)
+ * or 0 on not found
+ */
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
+{
+ addr_t cptr;
+ char c;
+ int pos = 0, wstart = 0;
+ const char *opptr = NULL;
+ enum {
+ st_wordstart, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ } state = st_wordstart;
+
+ if (!cmdline_ptr)
+ return -1; /* No command line */
+
+ cptr = cmdline_ptr & 0xf;
+ set_fs(cmdline_ptr >> 4);
+
+ while (cptr < 0x10000) {
+ c = rdfs8(cptr++);
+ pos++;
+
+ switch (state) {
+ case st_wordstart:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ wstart = pos;
+ /* fall through */
+
+ case st_wordcmp:
+ if (!*opptr)
+ if (!c || myisspace(c))
+ return wstart;
+ else
+ state = st_wordskip;
+ else if (!c)
+ return 0;
+ else if (c != *opptr++)
+ state = st_wordskip;
+ break;
+
+ case st_wordskip:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ state = st_wordstart;
+ break;
+ }
+ }
+
+ return 0; /* Buffer overrun */
+}
diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h
new file mode 100644
index 000000000..e19fd7536
--- /dev/null
+++ b/arch/x86/boot/code16gcc.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#
+# code16gcc.h
+#
+# This file is added to the assembler via -Wa when compiling 16-bit C code.
+# This is done this way instead via asm() to make sure gcc does not reorder
+# things around us.
+#
+# gcc 4.9+ has a real -m16 option so we can drop this hack long term.
+#
+
+ .code16gcc
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
new file mode 100644
index 000000000..4a46fab71
--- /dev/null
+++ b/arch/x86/boot/compressed/.gitignore
@@ -0,0 +1,6 @@
+relocs
+vmlinux.bin.all
+vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
new file mode 100644
index 000000000..5642f025b
--- /dev/null
+++ b/arch/x86/boot/compressed/Makefile
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# linux/arch/x86/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+# vmlinuz is:
+# decompression code (*.o)
+# asm globals (piggy.S), including:
+# vmlinux.bin.(gz|bz2|lzma|...)
+#
+# vmlinux.bin is:
+# vmlinux stripped of debugging and comments
+# vmlinux.bin.all is:
+# vmlinux.bin + vmlinux.relocs
+# vmlinux.bin.(gz|bz2|lzma|...) is:
+# (see scripts/Makefile.lib size_append)
+# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
+
+KASAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
+
+KBUILD_CFLAGS := -m$(BITS) -O2
+KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+cflags-$(CONFIG_X86_32) := -march=i386
+cflags-$(CONFIG_X86_64) := -mcmodel=small
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += -mno-mmx -mno-sse
+KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
+# Disable relocation relaxation in case the link is not PIE.
+KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
+
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
+UBSAN_SANITIZE :=n
+
+KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
+ifeq ($(CONFIG_X86_32),y)
+KBUILD_LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
+else
+# To build 64-bit compressed kernel as PIE, we disable relocation
+# overflow check to avoid relocation overflow error with a new linker
+# command-line option, -z noreloc-overflow.
+KBUILD_LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
+ && echo "-z noreloc-overflow -pie --no-dynamic-linker")
+endif
+LDFLAGS_vmlinux := -T
+
+hostprogs-y := mkpiggy
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+
+quiet_cmd_voffset = VOFFSET $@
+ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+
+targets += ../voffset.h
+
+$(obj)/../voffset.h: vmlinux FORCE
+ $(call if_changed,voffset)
+
+$(obj)/misc.o: $(obj)/../voffset.h
+
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
+ $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
+ $(obj)/piggy.o $(obj)/cpuflags.o
+
+vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
+ifdef CONFIG_X86_64
+ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
+ vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
+endif
+
+$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
+
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
+ $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+
+# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
+# can place it anywhere in memory and it will still run. However, since
+# it is executed as-is without any ELF relocation processing performed
+# (and has already had all relocation sections stripped from the binary),
+# none of the code can use data relocations (e.g. static assignments of
+# pointer values), since they will be meaningless at runtime. This check
+# will refuse to link the vmlinux if any of these relocations are found.
+quiet_cmd_check_data_rel = DATAREL $@
+define cmd_check_data_rel
+ for obj in $(filter %.o,$^); do \
+ $(READELF) -S $$obj | grep -qF .rel.local && { \
+ echo "error: $$obj has data relocations!" >&2; \
+ exit 1; \
+ } || true; \
+ done
+endef
+
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD $@
+ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
+$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+ $(call if_changed,check-and-link-vmlinux)
+
+OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
+
+CMD_RELOCS = arch/x86/tools/relocs
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux FORCE
+ $(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzma)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,xzkern)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzo)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lz4)
+
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZMA) := lzma
+suffix-$(CONFIG_KERNEL_XZ) := xz
+suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
+
+quiet_cmd_mkpiggy = MKPIGGY $@
+ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
+
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+ $(call if_changed,mkpiggy)
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
new file mode 100644
index 000000000..af6cda0b7
--- /dev/null
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "misc.h"
+
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
+
+static unsigned long fs;
+static inline void set_fs(unsigned long seg)
+{
+ fs = seg << 4; /* shift it back */
+}
+typedef unsigned long addr_t;
+static inline char rdfs8(addr_t addr)
+{
+ return *((char *)(fs + addr));
+}
+#include "../cmdline.c"
+unsigned long get_cmd_line_ptr(void)
+{
+ unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
+
+ cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+}
+int cmdline_find_option(const char *option, char *buffer, int bufsize)
+{
+ return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
+}
+int cmdline_find_option_bool(const char *option)
+{
+ return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
+}
+
+#endif
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 000000000..6448a8196
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+ get_cpuflags();
+
+ return test_bit(flag, cpu.flags);
+}
+
+#endif
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
new file mode 100644
index 000000000..261e81fb9
--- /dev/null
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -0,0 +1,5 @@
+#include "misc.h"
+
+int early_serial_base;
+
+#include "../early_serial_console.c"
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
new file mode 100644
index 000000000..544ac4faf
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.c
@@ -0,0 +1,934 @@
+
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/efi.h>
+#include <linux/pci.h>
+
+#include <asm/efi.h>
+#include <asm/e820/types.h>
+#include <asm/setup.h>
+#include <asm/desc.h>
+
+#include "../string.h"
+#include "eboot.h"
+
+static efi_system_table_t *sys_table;
+
+static struct efi_config *efi_early;
+
+__pure const struct efi_config *__efi_early(void)
+{
+ return efi_early;
+}
+
+#define BOOT_SERVICES(bits) \
+static void setup_boot_services##bits(struct efi_config *c) \
+{ \
+ efi_system_table_##bits##_t *table; \
+ \
+ table = (typeof(table))sys_table; \
+ \
+ c->runtime_services = table->runtime; \
+ c->boot_services = table->boottime; \
+ c->text_output = table->con_out; \
+}
+BOOT_SERVICES(32);
+BOOT_SERVICES(64);
+
+void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
+{
+ efi_call_proto(efi_simple_text_output_protocol, output_string,
+ efi_early->text_output, str);
+}
+
+static efi_status_t
+preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
+{
+ struct pci_setup_rom *rom = NULL;
+ efi_status_t status;
+ unsigned long size;
+ uint64_t romsize;
+ void *romimage;
+
+ /*
+ * Some firmware images contain EFI function pointers at the place where
+ * the romimage and romsize fields are supposed to be. Typically the EFI
+ * code is mapped at high addresses, translating to an unrealistically
+ * large romsize. The UEFI spec limits the size of option ROMs to 16
+ * MiB so we reject any ROMs over 16 MiB in size to catch this.
+ */
+ romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
+ romimage, pci);
+ romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+ if (!romimage || !romsize || romsize > SZ_16M)
+ return EFI_INVALID_PARAMETER;
+
+ size = romsize + sizeof(*rom);
+
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
+ return status;
+ }
+
+ memset(rom, 0, sizeof(*rom));
+
+ rom->data.type = SETUP_PCI;
+ rom->data.len = size - sizeof(struct setup_data);
+ rom->data.next = 0;
+ rom->pcilen = pci->romsize;
+ *__rom = rom;
+
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
+ &rom->vendor);
+
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->vendor\n");
+ goto free_struct;
+ }
+
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
+ &rom->devid);
+
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->devid\n");
+ goto free_struct;
+ }
+
+ status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
+ &rom->segment, &rom->bus, &rom->device,
+ &rom->function);
+
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+ memcpy(rom->romdata, romimage, romsize);
+ return status;
+
+free_struct:
+ efi_call_early(free_pool, rom);
+ return status;
+}
+
+/*
+ * There's no way to return an informative status from this function,
+ * because any analysis (and printing of error messages) needs to be
+ * done directly at the EFI function call-site.
+ *
+ * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we
+ * just didn't find any PCI devices, but there's no way to tell outside
+ * the context of the call.
+ */
+static void setup_efi_pci(struct boot_params *params)
+{
+ efi_status_t status;
+ void **pci_handle = NULL;
+ efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
+ unsigned long size = 0;
+ unsigned long nr_pci;
+ struct setup_data *data;
+ int i;
+
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ &pci_proto, NULL, &size, pci_handle);
+
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ status = efi_call_early(allocate_pool,
+ EFI_LOADER_DATA,
+ size, (void **)&pci_handle);
+
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
+ return;
+ }
+
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+ NULL, &size, pci_handle);
+ }
+
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
+ for (i = 0; i < nr_pci; i++) {
+ efi_pci_io_protocol_t *pci = NULL;
+ struct pci_setup_rom *rom;
+
+ status = efi_call_early(handle_protocol,
+ efi_is_64bit() ? ((u64 *)pci_handle)[i]
+ : ((u32 *)pci_handle)[i],
+ &pci_proto, (void **)&pci);
+ if (status != EFI_SUCCESS || !pci)
+ continue;
+
+ status = preserve_pci_rom_image(pci, &rom);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ if (data)
+ data->next = (unsigned long)rom;
+ else
+ params->hdr.setup_data = (unsigned long)rom;
+
+ data = (struct setup_data *)rom;
+ }
+
+free_handle:
+ efi_call_early(free_pool, pci_handle);
+}
+
+static void retrieve_apple_device_properties(struct boot_params *boot_params)
+{
+ efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
+ struct setup_data *data, *new;
+ efi_status_t status;
+ u32 size = 0;
+ void *p;
+
+ status = efi_call_early(locate_protocol, &guid, NULL, &p);
+ if (status != EFI_SUCCESS)
+ return;
+
+ if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
+ efi_printk(sys_table, "Unsupported properties proto version\n");
+ return;
+ }
+
+ efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
+ if (!size)
+ return;
+
+ do {
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size + sizeof(struct setup_data), &new);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
+ return;
+ }
+
+ status = efi_call_proto(apple_properties_protocol, get_all, p,
+ new->data, &size);
+
+ if (status == EFI_BUFFER_TOO_SMALL)
+ efi_call_early(free_pool, new);
+ } while (status == EFI_BUFFER_TOO_SMALL);
+
+ new->type = SETUP_APPLE_PROPERTIES;
+ new->len = size;
+ new->next = 0;
+
+ data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ if (!data) {
+ boot_params->hdr.setup_data = (unsigned long)new;
+ } else {
+ while (data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+ data->next = (unsigned long)new;
+ }
+}
+
+static const efi_char16_t apple[] = L"Apple";
+
+static void setup_quirks(struct boot_params *boot_params)
+{
+ efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
+ efi_table_attr(efi_system_table, fw_vendor, sys_table);
+
+ if (!memcmp(fw_vendor, apple, sizeof(apple))) {
+ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
+ retrieve_apple_device_properties(boot_params);
+ }
+}
+
+/*
+ * See if we have Universal Graphics Adapter (UGA) protocol
+ */
+static efi_status_t
+setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
+{
+ efi_status_t status;
+ u32 width, height;
+ void **uga_handle = NULL;
+ efi_uga_draw_protocol_t *uga = NULL, *first_uga;
+ unsigned long nr_ugas;
+ int i;
+
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size, (void **)&uga_handle);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ uga_proto, NULL, &size, uga_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ height = 0;
+ width = 0;
+
+ first_uga = NULL;
+ nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
+ for (i = 0; i < nr_ugas; i++) {
+ efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
+ u32 w, h, depth, refresh;
+ void *pciio;
+ unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
+ : ((u32 *)uga_handle)[i];
+
+ status = efi_call_early(handle_protocol, handle,
+ uga_proto, (void **)&uga);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ pciio = NULL;
+ efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
+
+ status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
+ &w, &h, &depth, &refresh);
+ if (status == EFI_SUCCESS && (!first_uga || pciio)) {
+ width = w;
+ height = h;
+
+ /*
+ * Once we've found a UGA supporting PCIIO,
+ * don't bother looking any further.
+ */
+ if (pciio)
+ break;
+
+ first_uga = uga;
+ }
+ }
+
+ if (!width && !height)
+ goto free_handle;
+
+ /* EFI framebuffer */
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+ si->lfb_depth = 32;
+ si->lfb_width = width;
+ si->lfb_height = height;
+
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+
+free_handle:
+ efi_call_early(free_pool, uga_handle);
+
+ return status;
+}
+
+void setup_graphics(struct boot_params *boot_params)
+{
+ efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ struct screen_info *si;
+ efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
+ efi_status_t status;
+ unsigned long size;
+ void **gop_handle = NULL;
+ void **uga_handle = NULL;
+
+ si = &boot_params->screen_info;
+ memset(si, 0, sizeof(*si));
+
+ size = 0;
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ &graphics_proto, NULL, &size, gop_handle);
+ if (status == EFI_BUFFER_TOO_SMALL)
+ status = efi_setup_gop(NULL, si, &graphics_proto, size);
+
+ if (status != EFI_SUCCESS) {
+ size = 0;
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ &uga_proto, NULL, &size, uga_handle);
+ if (status == EFI_BUFFER_TOO_SMALL)
+ setup_uga(si, &uga_proto, size);
+ }
+}
+
+/*
+ * Because the x86 boot code expects to be passed a boot_params we
+ * need to create one ourselves (usually the bootloader would create
+ * one for us).
+ *
+ * The caller is responsible for filling out ->code32_start in the
+ * returned boot_params.
+ */
+struct boot_params *make_boot_params(struct efi_config *c)
+{
+ struct boot_params *boot_params;
+ struct apm_bios_info *bi;
+ struct setup_header *hdr;
+ efi_loaded_image_t *image;
+ void *options, *handle;
+ efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
+ int options_size = 0;
+ efi_status_t status;
+ char *cmdline_ptr;
+ u16 *s2;
+ u8 *s1;
+ int i;
+ unsigned long ramdisk_addr;
+ unsigned long ramdisk_size;
+
+ efi_early = c;
+ sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
+ handle = (void *)(unsigned long)efi_early->image_handle;
+
+ /* Check if we were booted by the EFI firmware */
+ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ return NULL;
+
+ if (efi_is_64bit())
+ setup_boot_services64(efi_early);
+ else
+ setup_boot_services32(efi_early);
+
+ status = efi_call_early(handle_protocol, handle,
+ &proto, (void *)&image);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+ return NULL;
+ }
+
+ status = efi_low_alloc(sys_table, 0x4000, 1,
+ (unsigned long *)&boot_params);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
+ return NULL;
+ }
+
+ memset(boot_params, 0x0, 0x4000);
+
+ hdr = &boot_params->hdr;
+ bi = &boot_params->apm_bios_info;
+
+ /* Copy the second sector to boot_params */
+ memcpy(&hdr->jump, image->image_base + 512, 512);
+
+ /*
+ * Fill out some of the header fields ourselves because the
+ * EFI firmware loader doesn't load the first sector.
+ */
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
+
+ hdr->type_of_loader = 0x21;
+
+ /* Convert unicode cmdline to ascii */
+ cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
+ if (!cmdline_ptr)
+ goto fail;
+
+ hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+ /* Fill in upper bits of command line address, NOP on 32 bit */
+ boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
+
+ hdr->ramdisk_image = 0;
+ hdr->ramdisk_size = 0;
+
+ /* Clear APM BIOS info */
+ memset(bi, 0, sizeof(*bi));
+
+ status = efi_parse_options(cmdline_ptr);
+ if (status != EFI_SUCCESS)
+ goto fail2;
+
+ status = handle_cmdline_files(sys_table, image,
+ (char *)(unsigned long)hdr->cmd_line_ptr,
+ "initrd=", hdr->initrd_addr_max,
+ &ramdisk_addr, &ramdisk_size);
+
+ if (status != EFI_SUCCESS &&
+ hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
+ efi_printk(sys_table, "Trying to load files to higher address\n");
+ status = handle_cmdline_files(sys_table, image,
+ (char *)(unsigned long)hdr->cmd_line_ptr,
+ "initrd=", -1UL,
+ &ramdisk_addr, &ramdisk_size);
+ }
+
+ if (status != EFI_SUCCESS)
+ goto fail2;
+ hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
+ hdr->ramdisk_size = ramdisk_size & 0xffffffff;
+ boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
+ boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
+
+ return boot_params;
+
+fail2:
+ efi_free(sys_table, options_size, hdr->cmd_line_ptr);
+fail:
+ efi_free(sys_table, 0x4000, (unsigned long)boot_params);
+
+ return NULL;
+}
+
+static void add_e820ext(struct boot_params *params,
+ struct setup_data *e820ext, u32 nr_entries)
+{
+ struct setup_data *data;
+ efi_status_t status;
+ unsigned long size;
+
+ e820ext->type = SETUP_E820_EXT;
+ e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
+ e820ext->next = 0;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ if (data)
+ data->next = (unsigned long)e820ext;
+ else
+ params->hdr.setup_data = (unsigned long)e820ext;
+}
+
+static efi_status_t
+setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
+{
+ struct boot_e820_entry *entry = params->e820_table;
+ struct efi_info *efi = &params->efi_info;
+ struct boot_e820_entry *prev = NULL;
+ u32 nr_entries;
+ u32 nr_desc;
+ int i;
+
+ nr_entries = 0;
+ nr_desc = efi->efi_memmap_size / efi->efi_memdesc_size;
+
+ for (i = 0; i < nr_desc; i++) {
+ efi_memory_desc_t *d;
+ unsigned int e820_type = 0;
+ unsigned long m = efi->efi_memmap;
+
+#ifdef CONFIG_X86_64
+ m |= (u64)efi->efi_memmap_hi << 32;
+#endif
+
+ d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i);
+ switch (d->type) {
+ case EFI_RESERVED_TYPE:
+ case EFI_RUNTIME_SERVICES_CODE:
+ case EFI_RUNTIME_SERVICES_DATA:
+ case EFI_MEMORY_MAPPED_IO:
+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+ case EFI_PAL_CODE:
+ e820_type = E820_TYPE_RESERVED;
+ break;
+
+ case EFI_UNUSABLE_MEMORY:
+ e820_type = E820_TYPE_UNUSABLE;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ e820_type = E820_TYPE_ACPI;
+ break;
+
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ e820_type = E820_TYPE_RAM;
+ break;
+
+ case EFI_ACPI_MEMORY_NVS:
+ e820_type = E820_TYPE_NVS;
+ break;
+
+ case EFI_PERSISTENT_MEMORY:
+ e820_type = E820_TYPE_PMEM;
+ break;
+
+ default:
+ continue;
+ }
+
+ /* Merge adjacent mappings */
+ if (prev && prev->type == e820_type &&
+ (prev->addr + prev->size) == d->phys_addr) {
+ prev->size += d->num_pages << 12;
+ continue;
+ }
+
+ if (nr_entries == ARRAY_SIZE(params->e820_table)) {
+ u32 need = (nr_desc - i) * sizeof(struct e820_entry) +
+ sizeof(struct setup_data);
+
+ if (!e820ext || e820ext_size < need)
+ return EFI_BUFFER_TOO_SMALL;
+
+ /* boot_params map full, switch to e820 extended */
+ entry = (struct boot_e820_entry *)e820ext->data;
+ }
+
+ entry->addr = d->phys_addr;
+ entry->size = d->num_pages << PAGE_SHIFT;
+ entry->type = e820_type;
+ prev = entry++;
+ nr_entries++;
+ }
+
+ if (nr_entries > ARRAY_SIZE(params->e820_table)) {
+ u32 nr_e820ext = nr_entries - ARRAY_SIZE(params->e820_table);
+
+ add_e820ext(params, e820ext, nr_e820ext);
+ nr_entries -= nr_e820ext;
+ }
+
+ params->e820_entries = (u8)nr_entries;
+
+ return EFI_SUCCESS;
+}
+
+static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
+ u32 *e820ext_size)
+{
+ efi_status_t status;
+ unsigned long size;
+
+ size = sizeof(struct setup_data) +
+ sizeof(struct e820_entry) * nr_desc;
+
+ if (*e820ext) {
+ efi_call_early(free_pool, *e820ext);
+ *e820ext = NULL;
+ *e820ext_size = 0;
+ }
+
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size, (void **)e820ext);
+ if (status == EFI_SUCCESS)
+ *e820ext_size = size;
+
+ return status;
+}
+
+static efi_status_t allocate_e820(struct boot_params *params,
+ struct setup_data **e820ext,
+ u32 *e820ext_size)
+{
+ unsigned long map_size, desc_size, buff_size;
+ struct efi_boot_memmap boot_map;
+ efi_memory_desc_t *map;
+ efi_status_t status;
+ __u32 nr_desc;
+
+ boot_map.map = &map;
+ boot_map.map_size = &map_size;
+ boot_map.desc_size = &desc_size;
+ boot_map.desc_ver = NULL;
+ boot_map.key_ptr = NULL;
+ boot_map.buff_size = &buff_size;
+
+ status = efi_get_memory_map(sys_table, &boot_map);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ nr_desc = buff_size / desc_size;
+
+ if (nr_desc > ARRAY_SIZE(params->e820_table)) {
+ u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
+
+ status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
+ if (status != EFI_SUCCESS)
+ return status;
+ }
+
+ return EFI_SUCCESS;
+}
+
+struct exit_boot_struct {
+ struct boot_params *boot_params;
+ struct efi_info *efi;
+};
+
+static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
+ struct efi_boot_memmap *map,
+ void *priv)
+{
+ const char *signature;
+ __u32 nr_desc;
+ efi_status_t status;
+ struct exit_boot_struct *p = priv;
+
+ signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
+ : EFI32_LOADER_SIGNATURE;
+ memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
+
+ p->efi->efi_systab = (unsigned long)sys_table_arg;
+ p->efi->efi_memdesc_size = *map->desc_size;
+ p->efi->efi_memdesc_version = *map->desc_ver;
+ p->efi->efi_memmap = (unsigned long)*map->map;
+ p->efi->efi_memmap_size = *map->map_size;
+
+#ifdef CONFIG_X86_64
+ p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+ p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
+#endif
+
+ return EFI_SUCCESS;
+}
+
+static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
+{
+ unsigned long map_sz, key, desc_size, buff_size;
+ efi_memory_desc_t *mem_map;
+ struct setup_data *e820ext = NULL;
+ __u32 e820ext_size = 0;
+ efi_status_t status;
+ __u32 desc_version;
+ struct efi_boot_memmap map;
+ struct exit_boot_struct priv;
+
+ map.map = &mem_map;
+ map.map_size = &map_sz;
+ map.desc_size = &desc_size;
+ map.desc_ver = &desc_version;
+ map.key_ptr = &key;
+ map.buff_size = &buff_size;
+ priv.boot_params = boot_params;
+ priv.efi = &boot_params->efi_info;
+
+ status = allocate_e820(boot_params, &e820ext, &e820ext_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /* Might as well exit boot services now */
+ status = efi_exit_boot_services(sys_table, handle, &map, &priv,
+ exit_boot_func);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /* Historic? */
+ boot_params->alt_mem_k = 32 * 1024;
+
+ status = setup_e820(boot_params, e820ext, e820ext_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ return EFI_SUCCESS;
+}
+
+/*
+ * On success we return a pointer to a boot_params structure, and NULL
+ * on failure.
+ */
+struct boot_params *
+efi_main(struct efi_config *c, struct boot_params *boot_params)
+{
+ struct desc_ptr *gdt = NULL;
+ efi_loaded_image_t *image;
+ struct setup_header *hdr = &boot_params->hdr;
+ efi_status_t status;
+ struct desc_struct *desc;
+ void *handle;
+ efi_system_table_t *_table;
+ unsigned long cmdline_paddr;
+
+ efi_early = c;
+
+ _table = (efi_system_table_t *)(unsigned long)efi_early->table;
+ handle = (void *)(unsigned long)efi_early->image_handle;
+
+ sys_table = _table;
+
+ /* Check if we were booted by the EFI firmware */
+ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ goto fail;
+
+ if (efi_is_64bit())
+ setup_boot_services64(efi_early);
+ else
+ setup_boot_services32(efi_early);
+
+ /*
+ * make_boot_params() may have been called before efi_main(), in which
+ * case this is the second time we parse the cmdline. This is ok,
+ * parsing the cmdline multiple times does not have side-effects.
+ */
+ cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+ ((u64)boot_params->ext_cmd_line_ptr << 32));
+ efi_parse_options((char *)cmdline_paddr);
+
+ /*
+ * If the boot loader gave us a value for secure_boot then we use that,
+ * otherwise we ask the BIOS.
+ */
+ if (boot_params->secure_boot == efi_secureboot_mode_unset)
+ boot_params->secure_boot = efi_get_secureboot(sys_table);
+
+ /* Ask the firmware to clear memory on unclean shutdown */
+ efi_enable_reset_attack_mitigation(sys_table);
+ efi_retrieve_tpm2_eventlog(sys_table);
+
+ setup_graphics(boot_params);
+
+ setup_efi_pci(boot_params);
+
+ setup_quirks(boot_params);
+
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ sizeof(*gdt), (void **)&gdt);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
+ goto fail;
+ }
+
+ gdt->size = 0x800;
+ status = efi_low_alloc(sys_table, gdt->size, 8,
+ (unsigned long *)&gdt->address);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
+ goto fail;
+ }
+
+ /*
+ * If the kernel isn't already loaded at the preferred load
+ * address, relocate it.
+ */
+ if (hdr->pref_address != hdr->code32_start) {
+ unsigned long bzimage_addr = hdr->code32_start;
+ status = efi_relocate_kernel(sys_table, &bzimage_addr,
+ hdr->init_size, hdr->init_size,
+ hdr->pref_address,
+ hdr->kernel_alignment);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
+ goto fail;
+ }
+
+ hdr->pref_address = hdr->code32_start;
+ hdr->code32_start = bzimage_addr;
+ }
+
+ status = exit_boot(boot_params, handle);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "exit_boot() failed!\n");
+ goto fail;
+ }
+
+ memset((char *)gdt->address, 0x0, gdt->size);
+ desc = (struct desc_struct *)gdt->address;
+
+ /* The first GDT is a dummy. */
+ desc++;
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* __KERNEL32_CS */
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+
+ desc++;
+ } else {
+ /* Second entry is unused on 32-bit */
+ desc++;
+ }
+
+ /* __KERNEL_CS */
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ desc->l = 1;
+ desc->d = 0;
+ } else {
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ }
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+
+ /* __KERNEL_DS */
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* Task segment value */
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+ }
+
+ asm volatile("cli");
+ asm volatile ("lgdt %0" : : "m" (*gdt));
+
+ return boot_params;
+fail:
+ efi_printk(sys_table, "efi_main() failed!\n");
+
+ return NULL;
+}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
new file mode 100644
index 000000000..8297387c4
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_EBOOT_H
+#define BOOT_COMPRESSED_EBOOT_H
+
+#define SEG_TYPE_DATA (0 << 3)
+#define SEG_TYPE_READ_WRITE (1 << 1)
+#define SEG_TYPE_CODE (1 << 3)
+#define SEG_TYPE_EXEC_READ (1 << 1)
+#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
+#define SEG_OP_SIZE_32BIT (1 << 0)
+#define SEG_GRANULARITY_4KB (1 << 0)
+
+#define DESC_TYPE_CODE_DATA (1 << 0)
+
+typedef struct {
+ u32 get_mode;
+ u32 set_mode;
+ u32 blt;
+} efi_uga_draw_protocol_32_t;
+
+typedef struct {
+ u64 get_mode;
+ u64 set_mode;
+ u64 blt;
+} efi_uga_draw_protocol_64_t;
+
+typedef struct {
+ void *get_mode;
+ void *set_mode;
+ void *blt;
+} efi_uga_draw_protocol_t;
+
+#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
new file mode 100644
index 000000000..257e341fd
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_32.S
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * EFI call stub for IA32.
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off. Note that this implementation is different from the one in
+ * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
+ * mode at this point.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page_types.h>
+
+/*
+ * efi_call_phys(void *, ...) is a function with variable parameters.
+ * All the callers of this function assure that all the parameters are 4-bytes.
+ */
+
+/*
+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
+ * So we'd better save all of them at the beginning of this function and restore
+ * at the end no matter how many we use, because we can not assure EFI runtime
+ * service functions will comply with gcc calling convention, too.
+ */
+
+.text
+ENTRY(efi_call_phys)
+ /*
+ * 0. The function can only be called in Linux kernel. So CS has been
+ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
+ * the values of these registers are the same. And, the corresponding
+ * GDT entries are identical. So I will do nothing about segment reg
+ * and GDT, but change GDT base register in prelog and epilog.
+ */
+
+ /*
+ * 1. Because we haven't been relocated by this point we need to
+ * use relative addressing.
+ */
+ call 1f
+1: popl %edx
+ subl $1b, %edx
+
+ /*
+ * 2. Now on the top of stack is the return
+ * address in the caller of efi_call_phys(), then parameter 1,
+ * parameter 2, ..., param n. To make things easy, we save the return
+ * address of efi_call_phys in a global variable.
+ */
+ popl %ecx
+ movl %ecx, saved_return_addr(%edx)
+ /* get the function pointer into ECX*/
+ popl %ecx
+ movl %ecx, efi_rt_function_ptr(%edx)
+
+ /*
+ * 3. Call the physical function.
+ */
+ call *%ecx
+
+ /*
+ * 4. Balance the stack. And because EAX contain the return value,
+ * we'd better not clobber it. We need to calculate our address
+ * again because %ecx and %edx are not preserved across EFI function
+ * calls.
+ */
+ call 1f
+1: popl %edx
+ subl $1b, %edx
+
+ movl efi_rt_function_ptr(%edx), %ecx
+ pushl %ecx
+
+ /*
+ * 10. Push the saved return address onto the stack and return.
+ */
+ movl saved_return_addr(%edx), %ecx
+ pushl %ecx
+ ret
+ENDPROC(efi_call_phys)
+.previous
+
+.data
+saved_return_addr:
+ .long 0
+efi_rt_function_ptr:
+ .long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
new file mode 100644
index 000000000..99494dff2
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -0,0 +1,5 @@
+#include <asm/segment.h>
+#include <asm/msr.h>
+#include <asm/processor-flags.h>
+
+#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000..bff9ab7c6
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT before we make EFI serivce calls,
+ * since the firmware's 32-bit IDT is still currently installed and it
+ * needs to be able to service interrupts.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identify
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+ .code64
+ .text
+ENTRY(efi64_thunk)
+ push %rbp
+ push %rbx
+
+ subq $8, %rsp
+ leaq efi_exit32(%rip), %rax
+ movl %eax, 4(%rsp)
+ leaq efi_gdt64(%rip), %rax
+ movl %eax, (%rsp)
+ movl %eax, 2(%rax) /* Fixup the gdt base address */
+
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ /*
+ * Convert x86-64 ABI params to i386 ABI
+ */
+ subq $32, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movq %r8, %rsi
+ movl %esi, 0xc(%rsp)
+ movq %r9, %rsi
+ movl %esi, 0x10(%rsp)
+
+ sgdt save_gdt(%rip)
+
+ leaq 1f(%rip), %rbx
+ movq %rbx, func_rt_ptr(%rip)
+
+ /*
+ * Switch to gdt with 32-bit segments. This is the firmware GDT
+ * that was installed when the kernel started executing. This
+ * pointer was saved at the EFI stub entry point in head_64.S.
+ */
+ leaq efi32_boot_gdt(%rip), %rax
+ lgdt (%rax)
+
+ pushq $__KERNEL_CS
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $32, %rsp
+
+ lgdt save_gdt(%rip)
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ test %rax, %rax
+ jz 1f
+ movl %eax, %ecx
+ andl $0x0fffffff, %ecx
+ andl $0xf0000000, %eax
+ shl $32, %rax
+ or %rcx, %rax
+1:
+ addq $8, %rsp
+ pop %rbx
+ pop %rbp
+ ret
+ENDPROC(efi64_thunk)
+
+ENTRY(efi_exit32)
+ movq func_rt_ptr(%rip), %rax
+ push %rax
+ mov %rdi, %rax
+ ret
+ENDPROC(efi_exit32)
+
+ .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ /* Reload pgtables */
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode via EFER */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btrl $_EFER_LME, %eax
+ wrmsr
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ /*
+ * Some firmware will return with interrupts enabled. Be sure to
+ * disable them before we switch GDTs.
+ */
+ cli
+
+ movl 56(%esp), %eax
+ movl %eax, 2(%eax)
+ lgdtl (%eax)
+
+ movl %cr4, %eax
+ btsl $(X86_CR4_PAE_BIT), %eax
+ movl %eax, %cr4
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ xorl %eax, %eax
+ lldt %ax
+
+ movl 60(%esp), %eax
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enable paging */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+ lret
+ENDPROC(efi_enter32)
+
+ .data
+ .balign 8
+ .global efi32_boot_gdt
+efi32_boot_gdt: .word 0
+ .quad 0
+
+save_gdt: .word 0
+ .quad 0
+func_rt_ptr: .quad 0
+
+ .global efi_gdt64
+efi_gdt64:
+ .word efi_gdt64_end - efi_gdt64
+ .long 0 /* Filled out by user */
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x0080890000000000 /* TS descriptor */
+ .quad 0x0000000000000000 /* TS continued */
+efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
new file mode 100644
index 000000000..c881878e5
--- /dev/null
+++ b/arch/x86/boot/compressed/error.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Callers outside of misc.c need access to the error reporting routines,
+ * but the *_putstr() functions need to stay in misc.c because of how
+ * memcpy() and memmove() are defined for the compressed boot environment.
+ */
+#include "misc.h"
+#include "error.h"
+
+void warn(char *m)
+{
+ error_putstr("\n\n");
+ error_putstr(m);
+ error_putstr("\n\n");
+}
+
+void error(char *m)
+{
+ warn(m);
+ error_putstr(" -- System halted");
+
+ while (1)
+ asm("hlt");
+}
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
new file mode 100644
index 000000000..1de582118
--- /dev/null
+++ b/arch/x86/boot/compressed/error.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_ERROR_H
+#define BOOT_COMPRESSED_ERROR_H
+
+#include <linux/compiler.h>
+
+void warn(char *m);
+void error(char *m) __noreturn;
+
+#endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
new file mode 100644
index 000000000..c6c4b877f
--- /dev/null
+++ b/arch/x86/boot/compressed/head_32.S
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/boot/head.S
+ *
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in
+ * laptops may need to access the BIOS data stored there. This is also
+ * useful for future device drivers that either access the BIOS via VM86
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+ .text
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page_types.h>
+#include <asm/boot.h>
+#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
+
+/*
+ * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
+ * relocation to get the symbol address in PIC. When the compressed x86
+ * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
+ * relocations to their fixed symbol addresses. However, when the
+ * compressed x86 kernel is loaded at a different address, it leads
+ * to the following load failure:
+ *
+ * Failed to allocate space for phdrs
+ *
+ * during the decompression stage.
+ *
+ * If the compressed x86 kernel is relocatable at run-time, it should be
+ * compiled with -fPIE, instead of -fPIC, if possible and should be built as
+ * Position Independent Executable (PIE) so that linker won't optimize
+ * R_386_GOT32X relocation to its fixed symbol address. Older
+ * linkers generate R_386_32 relocations against locally defined symbols,
+ * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less
+ * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
+ * R_386_32 relocations when relocating the kernel. To generate
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
+ * hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _got
+ .hidden _egot
+ .hidden _end
+
+ __HEAD
+ENTRY(startup_32)
+ cld
+ /*
+ * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+ * us to not reload segments
+ */
+ testb $KEEP_SEGMENTS, BP_loadflags(%esi)
+ jnz 1f
+
+ cli
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+1:
+
+/*
+ * Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x1e4 (defined as a scratch field) are used as the stack
+ * for this calculation. Only 4 bytes are needed.
+ */
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/*
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jae 1f
+#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
+
+ /* Target address to relocate to for decompression */
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ addl %eax, %ebx
+
+ /* Set up the stack */
+ leal boot_stack_end(%ebx), %esp
+
+ /* Zero EFLAGS */
+ pushl $0
+ popfl
+
+/*
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ pushl %esi
+ leal (_bss-4)(%ebp), %esi
+ leal (_bss-4)(%ebx), %edi
+ movl $(_bss - startup_32), %ecx
+ shrl $2, %ecx
+ std
+ rep movsl
+ cld
+ popl %esi
+
+/*
+ * Jump to the relocated address.
+ */
+ leal relocated(%ebx), %eax
+ jmp *%eax
+ENDPROC(startup_32)
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * We don't need the return address, so set up the stack so efi_main() can find
+ * its arguments.
+ */
+ENTRY(efi_pe_entry)
+ add $0x4, %esp
+
+ call 1f
+1: popl %esi
+ subl $1b, %esi
+
+ popl %ecx
+ movl %ecx, efi32_config(%esi) /* Handle */
+ popl %ecx
+ movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
+
+ /* Relocate efi_config->call() */
+ leal efi32_config(%esi), %eax
+ add %esi, 40(%eax)
+ pushl %eax
+
+ call make_boot_params
+ cmpl $0, %eax
+ je fail
+ movl %esi, BP_code32_start(%eax)
+ popl %ecx
+ pushl %eax
+ pushl %ecx
+ jmp 2f /* Skip efi_config initialization */
+ENDPROC(efi_pe_entry)
+
+ENTRY(efi32_stub_entry)
+ add $0x4, %esp
+ popl %ecx
+ popl %edx
+
+ call 1f
+1: popl %esi
+ subl $1b, %esi
+
+ movl %ecx, efi32_config(%esi) /* Handle */
+ movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
+
+ /* Relocate efi_config->call() */
+ leal efi32_config(%esi), %eax
+ add %esi, 40(%eax)
+ pushl %eax
+2:
+ call efi_main
+ cmpl $0, %eax
+ movl %eax, %esi
+ jne 2f
+fail:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp fail
+2:
+ movl BP_code32_start(%esi), %eax
+ leal startup_32(%eax), %eax
+ jmp *%eax
+ENDPROC(efi32_stub_entry)
+#endif
+
+ .text
+relocated:
+
+/*
+ * Clear BSS (stack is currently empty)
+ */
+ xorl %eax, %eax
+ leal _bss(%ebx), %edi
+ leal _ebss(%ebx), %ecx
+ subl %edi, %ecx
+ shrl $2, %ecx
+ rep stosl
+
+/*
+ * Adjust our own GOT
+ */
+ leal _got(%ebx), %edx
+ leal _egot(%ebx), %ecx
+1:
+ cmpl %ecx, %edx
+ jae 2f
+ addl %ebx, (%edx)
+ addl $4, %edx
+ jmp 1b
+2:
+
+/*
+ * Do the extraction, and jump to the new kernel..
+ */
+ /* push arguments for extract_kernel: */
+ pushl $z_output_len /* decompressed length, end of relocs */
+
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ movl %ebx, %ebp
+ subl %eax, %ebp
+ pushl %ebp /* output address */
+
+ pushl $z_input_len /* input_len */
+ leal input_data(%ebx), %eax
+ pushl %eax /* input_data */
+ leal boot_heap(%ebx), %eax
+ pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+ call extract_kernel /* returns kernel location in %eax */
+ addl $24, %esp
+
+/*
+ * Jump to the extracted kernel.
+ */
+ xorl %ebx, %ebx
+ jmp *%eax
+
+#ifdef CONFIG_EFI_STUB
+ .data
+efi32_config:
+ .fill 5,8,0
+ .long efi_call_phys
+ .long 0
+ .byte 0
+#endif
+
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+boot_heap:
+ .fill BOOT_HEAP_SIZE, 1, 0
+boot_stack:
+ .fill BOOT_STACK_SIZE, 1, 0
+boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
new file mode 100644
index 000000000..474733f8b
--- /dev/null
+++ b/arch/x86/boot/compressed/head_64.S
@@ -0,0 +1,721 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/boot/head.S
+ *
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in
+ * laptops may need to access the BIOS data stored there. This is also
+ * useful for future device drivers that either access the BIOS via VM86
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+ .code32
+ .text
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/msr.h>
+#include <asm/processor-flags.h>
+#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
+#include "pgtable.h"
+
+/*
+ * Locally defined symbols should be marked hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _got
+ .hidden _egot
+ .hidden _end
+
+ __HEAD
+ .code32
+ENTRY(startup_32)
+ /*
+ * 32bit entry is 0 and it is ABI so immutable!
+ * If we come here directly from a bootloader,
+ * kernel(text+data+bss+brk) ramdisk, zero_page, command line
+ * all need to be under the 4G limit.
+ */
+ cld
+ /*
+ * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+ * us to not reload segments
+ */
+ testb $KEEP_SEGMENTS, BP_loadflags(%esi)
+ jnz 1f
+
+ cli
+ movl $(__BOOT_DS), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+1:
+
+/*
+ * Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x1e4 (defined as a scratch field) are used as the stack
+ * for this calculation. Only 4 bytes are needed.
+ */
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/* setup a stack and make sure cpu supports long mode. */
+ movl $boot_stack_end, %eax
+ addl %ebp, %eax
+ movl %eax, %esp
+
+ call verify_cpu
+ testl %eax, %eax
+ jnz no_longmode
+
+/*
+ * Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jae 1f
+#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
+
+ /* Target address to relocate to for decompression */
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ addl %eax, %ebx
+
+/*
+ * Prepare for entering 64 bit mode
+ */
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ addl %ebp, gdt+2(%ebp)
+ lgdt gdt(%ebp)
+
+ /* Enable PAE mode */
+ movl %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ movl %eax, %cr4
+
+ /*
+ * Build early 4G boot pagetable
+ */
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+ * This will insure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+ call get_sev_encryption_bit
+ xorl %edx, %edx
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+ bts %eax, %edx /* Set encryption mask for page tables */
+1:
+
+ /* Initialize Page tables to 0 */
+ leal pgtable(%ebx), %edi
+ xorl %eax, %eax
+ movl $(BOOT_INIT_PGT_SIZE/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal pgtable + 0(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+
+ /* Build Level 3 */
+ leal pgtable + 0x1000(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl %edx, 0x04(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal pgtable + 0x2000(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* After gdt is loaded */
+ xorl %eax, %eax
+ lldt %ax
+ movl $__BOOT_TSS, %eax
+ ltr %ax
+
+ /*
+ * Setup for the jump to 64bit mode
+ *
+ * When the jump is performend we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ pushl $__KERNEL_CS
+ leal startup_64(%ebp), %eax
+#ifdef CONFIG_EFI_MIXED
+ movl efi32_config(%ebp), %ebx
+ cmp $0, %ebx
+ jz 1f
+ leal handover_entry(%ebp), %eax
+1:
+#endif
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+ENDPROC(startup_32)
+
+#ifdef CONFIG_EFI_MIXED
+ .org 0x190
+ENTRY(efi32_stub_entry)
+ add $0x4, %esp /* Discard return address */
+ popl %ecx
+ popl %edx
+ popl %esi
+
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: pop %ebp
+ subl $1b, %ebp
+
+ movl %ecx, efi32_config(%ebp)
+ movl %edx, efi32_config+8(%ebp)
+ sgdtl efi32_boot_gdt(%ebp)
+
+ leal efi32_config(%ebp), %eax
+ movl %eax, efi_config(%ebp)
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ jmp startup_32
+ENDPROC(efi32_stub_entry)
+#endif
+
+ .code64
+ .org 0x200
+ENTRY(startup_64)
+ /*
+ * 64bit entry is 0x200 and it is ABI so immutable!
+ * We come here either from startup_32 or directly from a
+ * 64bit bootloader.
+ * If we come here from a bootloader, kernel(text+data+bss+brk),
+ * ramdisk, zero_page, command line could be above 4G.
+ * We depend on an identity mapped page table being provided
+ * that maps our entire kernel(text+data+bss+brk), zero page
+ * and command line.
+ */
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /*
+ * Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary. %rbp contains the
+ * decompressed kernel start address.
+ *
+ * If it is a relocatable kernel then decompress and run the kernel
+ * from load address aligned to 2MB addr, otherwise decompress and
+ * run the kernel from LOAD_PHYSICAL_ADDR
+ *
+ * We cannot rely on the calculation done in 32-bit mode, since we
+ * may have been invoked via the 64-bit entry point.
+ */
+
+ /* Start with the delta to where the kernel will run at. */
+#ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+ movl BP_kernel_alignment(%rsi), %eax
+ decl %eax
+ addq %rax, %rbp
+ notq %rax
+ andq %rax, %rbp
+ cmpq $LOAD_PHYSICAL_ADDR, %rbp
+ jae 1f
+#endif
+ movq $LOAD_PHYSICAL_ADDR, %rbp
+1:
+
+ /* Target address to relocate to for decompression */
+ movl BP_init_size(%rsi), %ebx
+ subl $_end, %ebx
+ addq %rbp, %rbx
+
+ /* Set up the stack */
+ leaq boot_stack_end(%rbx), %rsp
+
+ /*
+ * paging_prepare() and cleanup_trampoline() below can have GOT
+ * references. Adjust the table with address we are running at.
+ *
+ * Zero RAX for adjust_got: the GOT was not adjusted before;
+ * there's no adjustment to undo.
+ */
+ xorq %rax, %rax
+
+ /*
+ * Calculate the address the binary is loaded at and use it as
+ * a GOT adjustment.
+ */
+ call 1f
+1: popq %rdi
+ subq $1b, %rdi
+
+ call adjust_got
+
+ /*
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we might want to enable 5-level paging or vice versa.
+ *
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
+ *
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
+ *
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+ *
+ * We go though the trampoline even if we don't have to: if we're
+ * already in a desired paging mode. This way the trampoline code gets
+ * tested on every boot.
+ */
+
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt(%rip), %rax
+ movq %rax, gdt64+2(%rip)
+ lgdt gdt64(%rip)
+
+ /*
+ * paging_prepare() sets up the trampoline and checks if we need to
+ * enable 5-level paging.
+ *
+ * Address of the trampoline is returned in RAX.
+ * Non zero RDX on return means we need to enable 5-level paging.
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ movq %rsi, %rdi /* real mode address */
+ call paging_prepare
+ popq %rsi
+
+ /* Save the trampoline address in RCX */
+ movq %rax, %rcx
+
+ /*
+ * Load the address of trampoline_return() into RDI.
+ * It will be used by the trampoline to return to the main code.
+ */
+ leaq trampoline_return(%rip), %rdi
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
+ pushq %rax
+ lretq
+trampoline_return:
+ /* Restore the stack, the 32-bit trampoline uses its own stack */
+ leaq boot_stack_end(%rbx), %rsp
+
+ /*
+ * cleanup_trampoline() would restore trampoline memory.
+ *
+ * RDI is address of the page table to use instead of page table
+ * in trampoline memory (if required).
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ leaq top_pgtable(%rbx), %rdi
+ call cleanup_trampoline
+ popq %rsi
+
+ /* Zero EFLAGS */
+ pushq $0
+ popfq
+
+ /*
+ * Previously we've adjusted the GOT with address the binary was
+ * loaded at. Now we need to re-adjust for relocation address.
+ *
+ * Calculate the address the binary is loaded at, so that we can
+ * undo the previous GOT adjustment.
+ */
+ call 1f
+1: popq %rax
+ subq $1b, %rax
+
+ /* The new adjustment is the relocation address */
+ movq %rbx, %rdi
+ call adjust_got
+
+/*
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ pushq %rsi
+ leaq (_bss-8)(%rip), %rsi
+ leaq (_bss-8)(%rbx), %rdi
+ movq $_bss /* - $startup_32 */, %rcx
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ popq %rsi
+
+/*
+ * Jump to the relocated address.
+ */
+ leaq relocated(%rbx), %rax
+ jmp *%rax
+
+#ifdef CONFIG_EFI_STUB
+
+/* The entry point for the PE/COFF executable is efi_pe_entry. */
+ENTRY(efi_pe_entry)
+ movq %rcx, efi64_config(%rip) /* Handle */
+ movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
+
+ leaq efi64_config(%rip), %rax
+ movq %rax, efi_config(%rip)
+
+ call 1f
+1: popq %rbp
+ subq $1b, %rbp
+
+ /*
+ * Relocate efi_config->call().
+ */
+ addq %rbp, efi64_config+40(%rip)
+
+ movq %rax, %rdi
+ call make_boot_params
+ cmpq $0,%rax
+ je fail
+ mov %rax, %rsi
+ leaq startup_32(%rip), %rax
+ movl %eax, BP_code32_start(%rsi)
+ jmp 2f /* Skip the relocation */
+
+handover_entry:
+ call 1f
+1: popq %rbp
+ subq $1b, %rbp
+
+ /*
+ * Relocate efi_config->call().
+ */
+ movq efi_config(%rip), %rax
+ addq %rbp, 40(%rax)
+2:
+ movq efi_config(%rip), %rdi
+ call efi_main
+ movq %rax,%rsi
+ cmpq $0,%rax
+ jne 2f
+fail:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp fail
+2:
+ movl BP_code32_start(%esi), %eax
+ leaq startup_64(%rax), %rax
+ jmp *%rax
+ENDPROC(efi_pe_entry)
+
+ .org 0x390
+ENTRY(efi64_stub_entry)
+ movq %rdi, efi64_config(%rip) /* Handle */
+ movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
+
+ leaq efi64_config(%rip), %rax
+ movq %rax, efi_config(%rip)
+
+ movq %rdx, %rsi
+ jmp handover_entry
+ENDPROC(efi64_stub_entry)
+#endif
+
+ .text
+relocated:
+
+/*
+ * Clear BSS (stack is currently empty)
+ */
+ xorl %eax, %eax
+ leaq _bss(%rip), %rdi
+ leaq _ebss(%rip), %rcx
+ subq %rdi, %rcx
+ shrq $3, %rcx
+ rep stosq
+
+/*
+ * Do the extraction, and jump to the new kernel..
+ */
+ pushq %rsi /* Save the real mode argument */
+ movq %rsi, %rdi /* real mode address */
+ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
+ leaq input_data(%rip), %rdx /* input_data */
+ movl $z_input_len, %ecx /* input_len */
+ movq %rbp, %r8 /* output target address */
+ movq $z_output_len, %r9 /* decompressed length, end of relocs */
+ call extract_kernel /* returns kernel location in %rax */
+ popq %rsi
+
+/*
+ * Jump to the decompressed kernel.
+ */
+ jmp *%rax
+
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+ /* Walk through the GOT adding the address to the entries */
+ leaq _got(%rip), %rdx
+ leaq _egot(%rip), %rcx
+1:
+ cmpq %rcx, %rdx
+ jae 2f
+ subq %rax, (%rdx) /* Undo previous adjustment */
+ addq %rdi, (%rdx) /* Apply the new adjustment */
+ addq $8, %rdx
+ jmp 1b
+2:
+ ret
+
+ .code32
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains the return address (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX on return means we need to enable 5-level paging.
+ */
+ENTRY(trampoline_32bit_src)
+ /* Set up data and stack segments */
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %ss
+
+ /* Set up new stack */
+ leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Check what paging mode we want to be in after the trampoline */
+ cmpl $0, %edx
+ jz 1f
+
+ /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jnz 3f
+ jmp 2f
+1:
+ /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jz 3f
+2:
+ /* Point CR3 to the trampoline's new top level page table */
+ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+ movl %eax, %cr3
+3:
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+ pushl %ecx
+ pushl %edx
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+ popl %edx
+ popl %ecx
+
+ /* Enable PAE and LA57 (if required) paging modes */
+ movl $X86_CR4_PAE, %eax
+ cmpl $0, %edx
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
+ movl %eax, %cr4
+
+ /* Calculate address of paging_enabled() once we are executing in the trampoline */
+ leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+
+ /* Prepare the stack for far return to Long Mode */
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enable paging again */
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl %eax, %cr0
+
+ lret
+
+ .code64
+paging_enabled:
+ /* Return from the trampoline */
+ jmp *%rdi
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .code32
+no_longmode:
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
+1:
+ hlt
+ jmp 1b
+
+#include "../../kernel/verify_cpu.S"
+
+ .data
+gdt64:
+ .word gdt_end - gdt
+ .long 0
+ .word 0
+ .quad 0
+gdt:
+ .word gdt_end - gdt
+ .long gdt
+ .word 0
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x0080890000000000 /* TS descriptor */
+ .quad 0x0000000000000000 /* TS continued */
+gdt_end:
+
+#ifdef CONFIG_EFI_STUB
+efi_config:
+ .quad 0
+
+#ifdef CONFIG_EFI_MIXED
+ .global efi32_config
+efi32_config:
+ .fill 5,8,0
+ .quad efi64_thunk
+ .byte 0
+#endif
+
+ .global efi64_config
+efi64_config:
+ .fill 5,8,0
+ .quad efi_call
+ .byte 1
+#endif /* CONFIG_EFI_STUB */
+
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+boot_heap:
+ .fill BOOT_HEAP_SIZE, 1, 0
+boot_stack:
+ .fill BOOT_STACK_SIZE, 1, 0
+boot_stack_end:
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+ .section ".pgtable","a",@nobits
+ .balign 4096
+pgtable:
+ .fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+ .fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
new file mode 100644
index 000000000..d1e19f358
--- /dev/null
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kaslr.c
+ *
+ * This contains the routines needed to generate a reasonable level of
+ * entropy to choose a randomized kernel base address offset in support
+ * of Kernel Address Space Layout Randomization (KASLR). Additionally
+ * handles walking the physical memory maps (and tracking memory regions
+ * to avoid) in order to select a physical memory location that can
+ * contain the entire properly aligned running kernel image.
+ *
+ */
+
+/*
+ * isspace() in linux/ctype.h is expected by next_args() to filter
+ * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
+ * since isdigit() is implemented in both of them. Hence disable it
+ * here.
+ */
+#define BOOT_CTYPE_H
+
+/*
+ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
+ * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
+ * which is meaningless and will cause compiling error in some cases.
+ */
+#define __DISABLE_EXPORTS
+
+#include "misc.h"
+#include "error.h"
+#include "../string.h"
+
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/ctype.h>
+#include <linux/efi.h>
+#include <generated/utsrelease.h>
+#include <asm/efi.h>
+
+/* Macros used by the included decompressor code below. */
+#define STATIC
+#include <linux/decompress/mm.h>
+
+#ifdef CONFIG_X86_5LEVEL
+unsigned int __pgtable_l5_enabled;
+unsigned int pgdir_shift __ro_after_init = 39;
+unsigned int ptrs_per_p4d __ro_after_init = 1;
+#endif
+
+extern unsigned long get_cmd_line_ptr(void);
+
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+static unsigned long rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ unsigned long *ptr = (unsigned long *)area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long get_boot_seed(void)
+{
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+ hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+
+ return hash;
+}
+
+#define KASLR_COMPRESSED_BOOT
+#include "../../lib/kaslr.c"
+
+struct mem_vector {
+ unsigned long long start;
+ unsigned long long size;
+};
+
+/* Only supporting at most 4 unusable memmap regions with kaslr */
+#define MAX_MEMMAP_REGIONS 4
+
+static bool memmap_too_large;
+
+
+/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
+static unsigned long long mem_limit = ULLONG_MAX;
+
+
+enum mem_avoid_index {
+ MEM_AVOID_ZO_RANGE = 0,
+ MEM_AVOID_INITRD,
+ MEM_AVOID_CMDLINE,
+ MEM_AVOID_BOOTPARAMS,
+ MEM_AVOID_MEMMAP_BEGIN,
+ MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
+ MEM_AVOID_MAX,
+};
+
+static struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+ /* Item one is entirely before item two. */
+ if (one->start + one->size <= two->start)
+ return false;
+ /* Item one is entirely after item two. */
+ if (one->start >= two->start + two->size)
+ return false;
+ return true;
+}
+
+char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+#include "../../../../lib/ctype.c"
+#include "../../../../lib/cmdline.c"
+
+static int
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+{
+ char *oldp;
+
+ if (!p)
+ return -EINVAL;
+
+ /* We don't care about this option here */
+ if (!strncmp(p, "exactmap", 8))
+ return -EINVAL;
+
+ oldp = p;
+ *size = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ switch (*p) {
+ case '#':
+ case '$':
+ case '!':
+ *start = memparse(p + 1, &p);
+ return 0;
+ case '@':
+ /* memmap=nn@ss specifies usable region, should be skipped */
+ *size = 0;
+ /* Fall through */
+ default:
+ /*
+ * If w/o offset, only size specified, memmap=nn[KMG] has the
+ * same behaviour as mem=nn[KMG]. It limits the max address
+ * system can use. Region above the limit should be avoided.
+ */
+ *start = 0;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void mem_avoid_memmap(char *str)
+{
+ static int i;
+
+ if (i >= MAX_MEMMAP_REGIONS)
+ return;
+
+ while (str && (i < MAX_MEMMAP_REGIONS)) {
+ int rc;
+ unsigned long long start, size;
+ char *k = strchr(str, ',');
+
+ if (k)
+ *k++ = 0;
+
+ rc = parse_memmap(str, &start, &size);
+ if (rc < 0)
+ break;
+ str = k;
+
+ if (start == 0) {
+ /* Store the specified memory limit if size > 0 */
+ if (size > 0)
+ mem_limit = size;
+
+ continue;
+ }
+
+ mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
+ mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
+ i++;
+ }
+
+ /* More than 4 memmaps, fail kaslr */
+ if ((i >= MAX_MEMMAP_REGIONS) && str)
+ memmap_too_large = true;
+}
+
+/* Store the number of 1GB huge pages which users specified: */
+static unsigned long max_gb_huge_pages;
+
+static void parse_gb_huge_pages(char *param, char *val)
+{
+ static bool gbpage_sz;
+ char *p;
+
+ if (!strcmp(param, "hugepagesz")) {
+ p = val;
+ if (memparse(p, &p) != PUD_SIZE) {
+ gbpage_sz = false;
+ return;
+ }
+
+ if (gbpage_sz)
+ warn("Repeatedly set hugeTLB page size of 1G!\n");
+ gbpage_sz = true;
+ return;
+ }
+
+ if (!strcmp(param, "hugepages") && gbpage_sz) {
+ p = val;
+ max_gb_huge_pages = simple_strtoull(p, &p, 0);
+ return;
+ }
+}
+
+
+static int handle_mem_options(void)
+{
+ char *args = (char *)get_cmd_line_ptr();
+ size_t len = strlen((char *)args);
+ char *tmp_cmdline;
+ char *param, *val;
+ u64 mem_size;
+
+ if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
+ !strstr(args, "hugepages"))
+ return 0;
+
+ tmp_cmdline = malloc(len + 1);
+ if (!tmp_cmdline)
+ error("Failed to allocate space for tmp_cmdline");
+
+ memcpy(tmp_cmdline, args, len);
+ tmp_cmdline[len] = 0;
+ args = tmp_cmdline;
+
+ /* Chew leading spaces */
+ args = skip_spaces(args);
+
+ while (*args) {
+ args = next_arg(args, &param, &val);
+ /* Stop at -- */
+ if (!val && strcmp(param, "--") == 0) {
+ warn("Only '--' specified in cmdline");
+ free(tmp_cmdline);
+ return -1;
+ }
+
+ if (!strcmp(param, "memmap")) {
+ mem_avoid_memmap(val);
+ } else if (strstr(param, "hugepages")) {
+ parse_gb_huge_pages(param, val);
+ } else if (!strcmp(param, "mem")) {
+ char *p = val;
+
+ if (!strcmp(p, "nopentium"))
+ continue;
+ mem_size = memparse(p, &p);
+ if (mem_size == 0) {
+ free(tmp_cmdline);
+ return -EINVAL;
+ }
+ mem_limit = mem_size;
+ }
+ }
+
+ free(tmp_cmdline);
+ return 0;
+}
+
+/*
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * The mem_avoid array is used to store the ranges that need to be avoided
+ * when KASLR searches for an appropriate random address. We must avoid any
+ * regions that are unsafe to overlap with during decompression, and other
+ * things like the initrd, cmdline and boot_params. This comment seeks to
+ * explain mem_avoid as clearly as possible since incorrect mem_avoid
+ * memory ranges lead to really hard to debug boot failures.
+ *
+ * The initrd, cmdline, and boot_params are trivial to identify for
+ * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
+ * MEM_AVOID_BOOTPARAMS respectively below.
+ *
+ * What is not obvious how to avoid is the range of memory that is used
+ * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
+ * the compressed kernel (ZO) and its run space, which is used to extract
+ * the uncompressed kernel (VO) and relocs.
+ *
+ * ZO's full run size sits against the end of the decompression buffer, so
+ * we can calculate where text, data, bss, etc of ZO are positioned more
+ * easily.
+ *
+ * For additional background, the decompression calculations can be found
+ * in header.S, and the memory diagram is based on the one found in misc.c.
+ *
+ * The following conditions are already enforced by the image layouts and
+ * associated code:
+ * - input + input_size >= output + output_size
+ * - kernel_total_size <= init_size
+ * - kernel_total_size <= output_size (see Note below)
+ * - output + init_size >= output + output_size
+ *
+ * (Note that kernel_total_size and output_size have no fundamental
+ * relationship, but output_size is passed to choose_random_location
+ * as a maximum of the two. The diagram is showing a case where
+ * kernel_total_size is larger than output_size, but this case is
+ * handled by bumping output_size.)
+ *
+ * The above conditions can be illustrated by a diagram:
+ *
+ * 0 output input input+input_size output+init_size
+ * | | | | |
+ * | | | | |
+ * |-----|--------|--------|--------------|-----------|--|-------------|
+ * | | |
+ * | | |
+ * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size
+ *
+ * [output, output+init_size) is the entire memory range used for
+ * extracting the compressed image.
+ *
+ * [output, output+kernel_total_size) is the range needed for the
+ * uncompressed kernel (VO) and its run size (bss, brk, etc).
+ *
+ * [output, output+output_size) is VO plus relocs (i.e. the entire
+ * uncompressed payload contained by ZO). This is the area of the buffer
+ * written to during decompression.
+ *
+ * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
+ * range of the copied ZO and decompression code. (i.e. the range
+ * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
+ *
+ * [input, input+input_size) is the original copied compressed image (ZO)
+ * (i.e. it does not include its run size). This range must be avoided
+ * because it contains the data used for decompression.
+ *
+ * [input+input_size, output+init_size) is [_text, _end) for ZO. This
+ * range includes ZO's heap and stack, and must be avoided since it
+ * performs the decompression.
+ *
+ * Since the above two ranges need to be avoided and they are adjacent,
+ * they can be merged, resulting in: [input, output+init_size) which
+ * becomes the MEM_AVOID_ZO_RANGE below.
+ */
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ unsigned long output)
+{
+ unsigned long init_size = boot_params->hdr.init_size;
+ u64 initrd_start, initrd_size;
+ u64 cmd_line, cmd_line_size;
+ char *ptr;
+
+ /*
+ * Avoid the region that is unsafe to overlap during
+ * decompression.
+ */
+ mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
+ mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
+ add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
+ mem_avoid[MEM_AVOID_ZO_RANGE].size);
+
+ /* Avoid initrd. */
+ initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
+ initrd_start |= boot_params->hdr.ramdisk_image;
+ initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
+ initrd_size |= boot_params->hdr.ramdisk_size;
+ mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
+ mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
+ /* No need to set mapping for initrd, it will be handled in VO. */
+
+ /* Avoid kernel command line. */
+ cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
+ cmd_line |= boot_params->hdr.cmd_line_ptr;
+ /* Calculate size of cmd_line. */
+ ptr = (char *)(unsigned long)cmd_line;
+ for (cmd_line_size = 0; ptr[cmd_line_size++];)
+ ;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
+ mem_avoid[MEM_AVOID_CMDLINE].size);
+
+ /* Avoid boot parameters. */
+ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+ add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size);
+
+ /* We don't need to set a mapping for setup_data. */
+
+ /* Mark the memmap regions we need to avoid */
+ handle_mem_options();
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+ /* Make sure video RAM can be used. */
+ add_identity_map(0, PMD_SIZE);
+#endif
+}
+
+/*
+ * Does this memory vector overlap a known avoided area? If so, record the
+ * overlap region with the lowest address.
+ */
+static bool mem_avoid_overlap(struct mem_vector *img,
+ struct mem_vector *overlap)
+{
+ int i;
+ struct setup_data *ptr;
+ unsigned long earliest = img->start + img->size;
+ bool is_overlapping = false;
+
+ for (i = 0; i < MEM_AVOID_MAX; i++) {
+ if (mem_overlaps(img, &mem_avoid[i]) &&
+ mem_avoid[i].start < earliest) {
+ *overlap = mem_avoid[i];
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+ }
+
+ /* Avoid all entries in the setup_data linked list. */
+ ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ while (ptr) {
+ struct mem_vector avoid;
+
+ avoid.start = (unsigned long)ptr;
+ avoid.size = sizeof(*ptr) + ptr->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+
+ ptr = (struct setup_data *)(unsigned long)ptr->next;
+ }
+
+ return is_overlapping;
+}
+
+struct slot_area {
+ unsigned long addr;
+ int num;
+};
+
+#define MAX_SLOT_AREA 100
+
+static struct slot_area slot_areas[MAX_SLOT_AREA];
+
+static unsigned long slot_max;
+
+static unsigned long slot_area_index;
+
+static void store_slot_info(struct mem_vector *region, unsigned long image_size)
+{
+ struct slot_area slot_area;
+
+ if (slot_area_index == MAX_SLOT_AREA)
+ return;
+
+ slot_area.addr = region->start;
+ slot_area.num = (region->size - image_size) /
+ CONFIG_PHYSICAL_ALIGN + 1;
+
+ if (slot_area.num > 0) {
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
+ }
+}
+
+/*
+ * Skip as many 1GB huge pages as possible in the passed region
+ * according to the number which users specified:
+ */
+static void
+process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
+{
+ unsigned long addr, size = 0;
+ struct mem_vector tmp;
+ int i = 0;
+
+ if (!max_gb_huge_pages) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ addr = ALIGN(region->start, PUD_SIZE);
+ /* Did we raise the address above the passed in memory entry? */
+ if (addr < region->start + region->size)
+ size = region->size - (addr - region->start);
+
+ /* Check how many 1GB huge pages can be filtered out: */
+ while (size > PUD_SIZE && max_gb_huge_pages) {
+ size -= PUD_SIZE;
+ max_gb_huge_pages--;
+ i++;
+ }
+
+ /* No good 1GB huge pages found: */
+ if (!i) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ /*
+ * Skip those 'i'*1GB good huge pages, and continue checking and
+ * processing the remaining head or tail part of the passed region
+ * if available.
+ */
+
+ if (addr >= region->start + image_size) {
+ tmp.start = region->start;
+ tmp.size = addr - region->start;
+ store_slot_info(&tmp, image_size);
+ }
+
+ size = region->size - (addr - region->start) - i * PUD_SIZE;
+ if (size >= image_size) {
+ tmp.start = addr + i * PUD_SIZE;
+ tmp.size = size;
+ store_slot_info(&tmp, image_size);
+ }
+}
+
+static unsigned long slots_fetch_random(void)
+{
+ unsigned long slot;
+ int i;
+
+ /* Handle case of no slots stored. */
+ if (slot_max == 0)
+ return 0;
+
+ slot = kaslr_get_random_long("Physical") % slot_max;
+
+ for (i = 0; i < slot_area_index; i++) {
+ if (slot >= slot_areas[i].num) {
+ slot -= slot_areas[i].num;
+ continue;
+ }
+ return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+ }
+
+ if (i == slot_area_index)
+ debug_putstr("slots_fetch_random() failed!?\n");
+ return 0;
+}
+
+static void process_mem_region(struct mem_vector *entry,
+ unsigned long minimum,
+ unsigned long image_size)
+{
+ struct mem_vector region, overlap;
+ struct slot_area slot_area;
+ unsigned long start_orig, end;
+ struct mem_vector cur_entry;
+
+ /* On 32-bit, ignore entries entirely above our maximum. */
+ if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
+ return;
+
+ /* Ignore entries entirely below our minimum. */
+ if (entry->start + entry->size < minimum)
+ return;
+
+ /* Ignore entries above memory limit */
+ end = min(entry->size + entry->start, mem_limit);
+ if (entry->start >= end)
+ return;
+ cur_entry.start = entry->start;
+ cur_entry.size = end - entry->start;
+
+ region.start = cur_entry.start;
+ region.size = cur_entry.size;
+
+ /* Give up if slot area array is full. */
+ while (slot_area_index < MAX_SLOT_AREA) {
+ start_orig = region.start;
+
+ /* Potentially raise address to minimum location. */
+ if (region.start < minimum)
+ region.start = minimum;
+
+ /* Potentially raise address to meet alignment needs. */
+ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+ /* Did we raise the address above the passed in memory entry? */
+ if (region.start > cur_entry.start + cur_entry.size)
+ return;
+
+ /* Reduce size by any delta from the original address. */
+ region.size -= region.start - start_orig;
+
+ /* On 32-bit, reduce region size to fit within max size. */
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ region.start + region.size > KERNEL_IMAGE_SIZE)
+ region.size = KERNEL_IMAGE_SIZE - region.start;
+
+ /* Return if region can't contain decompressed kernel */
+ if (region.size < image_size)
+ return;
+
+ /* If nothing overlaps, store the region and return. */
+ if (!mem_avoid_overlap(&region, &overlap)) {
+ process_gb_huge_pages(&region, image_size);
+ return;
+ }
+
+ /* Store beginning of region if holds at least image_size. */
+ if (overlap.start > region.start + image_size) {
+ struct mem_vector beginning;
+
+ beginning.start = region.start;
+ beginning.size = overlap.start - region.start;
+ process_gb_huge_pages(&beginning, image_size);
+ }
+
+ /* Return if overlap extends to or past end of region. */
+ if (overlap.start + overlap.size >= region.start + region.size)
+ return;
+
+ /* Clip off the overlapping region and start over. */
+ region.size -= overlap.start - region.start + overlap.size;
+ region.start = overlap.start + overlap.size;
+ }
+}
+
+#ifdef CONFIG_EFI
+/*
+ * Returns true if mirror region found (and must have been processed
+ * for slots adding)
+ */
+static bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+ struct efi_info *e = &boot_params->efi_info;
+ bool efi_mirror_found = false;
+ struct mem_vector region;
+ efi_memory_desc_t *md;
+ unsigned long pmap;
+ char *signature;
+ u32 nr_desc;
+ int i;
+
+ signature = (char *)&e->efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
+ return false;
+
+#ifdef CONFIG_X86_32
+ /* Can't handle data above 4GB at this time */
+ if (e->efi_memmap_hi) {
+ warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
+ return false;
+ }
+ pmap = e->efi_memmap;
+#else
+ pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
+#endif
+
+ nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
+ efi_mirror_found = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+
+ /*
+ * Here we are more conservative in picking free memory than
+ * the EFI spec allows:
+ *
+ * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
+ * free memory and thus available to place the kernel image into,
+ * but in practice there's firmware where using that memory leads
+ * to crashes.
+ *
+ * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
+ */
+ if (md->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ break;
+ }
+ }
+ return true;
+}
+#else
+static inline bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+ return false;
+}
+#endif
+
+static void process_e820_entries(unsigned long minimum,
+ unsigned long image_size)
+{
+ int i;
+ struct mem_vector region;
+ struct boot_e820_entry *entry;
+
+ /* Verify potential e820 positions, appending to slots list. */
+ for (i = 0; i < boot_params->e820_entries; i++) {
+ entry = &boot_params->e820_table[i];
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+ region.start = entry->addr;
+ region.size = entry->size;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+ break;
+ }
+ }
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ /* Check if we had too many memmaps. */
+ if (memmap_too_large) {
+ debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
+ return 0;
+ }
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+ if (process_efi_entries(minimum, image_size))
+ return slots_fetch_random();
+
+ process_e820_entries(minimum, image_size);
+ return slots_fetch_random();
+}
+
+static unsigned long find_random_virt_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ unsigned long slots, random_addr;
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ /* Align image_size for easy slot calculations. */
+ image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
+
+ /*
+ * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
+ * that can hold image_size within the range of minimum to
+ * KERNEL_IMAGE_SIZE?
+ */
+ slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
+ CONFIG_PHYSICAL_ALIGN + 1;
+
+ random_addr = kaslr_get_random_long("Virtual") % slots;
+
+ return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
+}
+
+/*
+ * Since this function examines addresses much more numerically,
+ * it takes the input and output pointers as 'unsigned long'.
+ */
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
+{
+ unsigned long random_addr, min_addr;
+
+ if (cmdline_find_option_bool("nokaslr")) {
+ warn("KASLR disabled: 'nokaslr' on cmdline.");
+ return;
+ }
+
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
+
+ boot_params->hdr.loadflags |= KASLR_FLAG;
+
+ /* Prepare to add new identity pagetables on demand. */
+ initialize_identity_maps();
+
+ /* Record the various known unsafe memory ranges. */
+ mem_avoid_init(input, input_size, *output);
+
+ /*
+ * Low end of the randomization range should be the
+ * smaller of 512M or the initial kernel image
+ * location:
+ */
+ min_addr = min(*output, 512UL << 20);
+
+ /* Walk available memory entries to find a random address. */
+ random_addr = find_random_phys_addr(min_addr, output_size);
+ if (!random_addr) {
+ warn("Physical KASLR disabled: no suitable memory region!");
+ } else {
+ /* Update the new physical address location. */
+ if (*output != random_addr) {
+ add_identity_map(random_addr, output_size);
+ *output = random_addr;
+ }
+
+ /*
+ * This loads the identity mapping page table.
+ * This should only be done if a new physical address
+ * is found for the kernel, otherwise we should keep
+ * the old page table to make it be like the "nokaslr"
+ * case.
+ */
+ finalize_identity_maps();
+ }
+
+
+ /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
+ if (IS_ENABLED(CONFIG_X86_64))
+ random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
+ *virt_addr = random_addr;
+}
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
new file mode 100644
index 000000000..9557c5a15
--- /dev/null
+++ b/arch/x86/boot/compressed/kaslr_64.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016 Yinghai Lu
+ * Copyright (C) 2016 Kees Cook
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <asm/init.h>
+#include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#include "../../mm/ident_map.c"
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+ unsigned char *entry;
+
+ /* Validate there is space available for a new page. */
+ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ return NULL;
+ }
+
+ entry = pages->pgt_buf + pages->pgt_buf_offset;
+ pages->pgt_buf_offset += PAGE_SIZE;
+
+ return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long top_level_pgt;
+
+phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
+
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info;
+
+/* Locates and clears a region for a new top level page table. */
+void initialize_identity_maps(void)
+{
+ /* If running as an SEV guest, the encryption mask is required. */
+ set_sev_encryption_mask();
+
+ /* Exclude the encryption mask from __PHYSICAL_MASK */
+ physical_mask &= ~sme_me_mask;
+
+ /* Init mapping_info with run-time function/buffer pointers. */
+ mapping_info.alloc_pgt_page = alloc_pgt_page;
+ mapping_info.context = &pgt_data;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE;
+
+ /*
+ * It should be impossible for this not to already be true,
+ * but since calling this a second time would rewind the other
+ * counters, let's just make sure this is reset too.
+ */
+ pgt_data.pgt_buf_offset = 0;
+
+ /*
+ * If we came here via startup_32(), cr3 will be _pgtable already
+ * and we must append to the existing area instead of entirely
+ * overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
+ */
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
+ debug_putstr("booted via startup_32()\n");
+ pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ } else {
+ debug_putstr("booted via startup_64()\n");
+ pgt_data.pgt_buf = _pgtable;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
+ }
+}
+
+/*
+ * Adds the specified range to what will become the new identity mappings.
+ * Once all ranges have been added, the new mapping is activated by calling
+ * finalize_identity_maps() below.
+ */
+void add_identity_map(unsigned long start, unsigned long size)
+{
+ unsigned long end = start + size;
+
+ /* Align boundary to 2M. */
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ if (start >= end)
+ return;
+
+ /* Build the mapping. */
+ kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
+ start, end);
+}
+
+/*
+ * This switches the page tables to the new level4 that has been built
+ * via calls to add_identity_map() above. If booted via startup_32(),
+ * this is effectively a no-op.
+ */
+void finalize_identity_maps(void)
+{
+ write_cr3(top_level_pgt);
+}
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
new file mode 100644
index 000000000..a480356e0
--- /dev/null
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -0,0 +1,104 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .code32
+ENTRY(get_sev_encryption_bit)
+ xor %eax, %eax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %ebx
+ push %ecx
+ push %edx
+
+ /* Check if running under a hypervisor */
+ movl $1, %eax
+ cpuid
+ bt $31, %ecx /* Check the hypervisor bit */
+ jnc .Lno_sev
+
+ movl $0x80000000, %eax /* CPUID to check the highest leaf */
+ cpuid
+ cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
+ jb .Lno_sev
+
+ /*
+ * Check for the SEV feature:
+ * CPUID Fn8000_001F[EAX] - Bit 1
+ * CPUID Fn8000_001F[EBX] - Bits 5:0
+ * Pagetable bit position used to indicate encryption
+ */
+ movl $0x8000001f, %eax
+ cpuid
+ bt $1, %eax /* Check if SEV is available */
+ jnc .Lno_sev
+
+ movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */
+ rdmsr
+ bt $MSR_AMD64_SEV_ENABLED_BIT, %eax /* Check if SEV is active */
+ jnc .Lno_sev
+
+ movl %ebx, %eax
+ andl $0x3f, %eax /* Return the encryption bit location */
+ jmp .Lsev_exit
+
+.Lno_sev:
+ xor %eax, %eax
+
+.Lsev_exit:
+ pop %edx
+ pop %ecx
+ pop %ebx
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+ ret
+ENDPROC(get_sev_encryption_bit)
+
+ .code64
+ENTRY(set_sev_encryption_mask)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %rbp
+ push %rdx
+
+ movq %rsp, %rbp /* Save current stack pointer */
+
+ call get_sev_encryption_bit /* Get the encryption bit position */
+ testl %eax, %eax
+ jz .Lno_sev_mask
+
+ bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
+
+.Lno_sev_mask:
+ movq %rbp, %rsp /* Restore original stack pointer */
+
+ pop %rdx
+ pop %rbp
+#endif
+
+ xor %rax, %rax
+ ret
+ENDPROC(set_sev_encryption_mask)
+
+ .data
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .balign 8
+GLOBAL(sme_me_mask)
+ .quad 0
+#endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
new file mode 100644
index 000000000..0387d7a96
--- /dev/null
+++ b/arch/x86/boot/compressed/misc.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * misc.c
+ *
+ * This is a collection of several routines used to extract the kernel
+ * which includes KASLR relocation, decompression, ELF parsing, and
+ * relocation processing. Additionally included are the screen and serial
+ * output functions and related debugging support functions.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ * puts by Nick Holloway 1993, better puts by Martin Mares 1995
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+
+#include "misc.h"
+#include "error.h"
+#include "pgtable.h"
+#include "../string.h"
+#include "../voffset.h"
+#include <asm/bootparam_utils.h>
+
+/*
+ * WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically at
+ * run time, but no relocation processing is performed. This means that
+ * it is not safe to place pointers in static structures.
+ */
+
+/* Macros used by the included decompressor code below. */
+#define STATIC static
+
+/*
+ * Use normal definitions of mem*() from string.c. There are already
+ * included header files which expect a definition of memset() and by
+ * the time we define memset macro, it is too late.
+ */
+#undef memcpy
+#undef memset
+#define memzero(s, n) memset((s), 0, (n))
+#define memmove memmove
+
+/* Functions used by the included decompressor code below. */
+void *memmove(void *dest, const void *src, size_t n);
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+struct boot_params *boot_params;
+
+memptr free_mem_ptr;
+memptr free_mem_end_ptr;
+
+static char *vidmem;
+static int vidport;
+static int lines, cols;
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+/*
+ * NOTE: When adding a new decompressor, please update the analysis in
+ * ../header.S.
+ */
+
+static void scroll(void)
+{
+ int i;
+
+ memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+ for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
+ vidmem[i] = ' ';
+}
+
+#define XMTRDY 0x20
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define LSR 5 /* Line Status */
+static void serial_putchar(int ch)
+{
+ unsigned timeout = 0xffff;
+
+ while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+ cpu_relax();
+
+ outb(ch, early_serial_base + TXR);
+}
+
+void __putstr(const char *s)
+{
+ int x, y, pos;
+ char c;
+
+ if (early_serial_base) {
+ const char *str = s;
+ while (*str) {
+ if (*str == '\n')
+ serial_putchar('\r');
+ serial_putchar(*str++);
+ }
+ }
+
+ if (lines == 0 || cols == 0)
+ return;
+
+ x = boot_params->screen_info.orig_x;
+ y = boot_params->screen_info.orig_y;
+
+ while ((c = *s++) != '\0') {
+ if (c == '\n') {
+ x = 0;
+ if (++y >= lines) {
+ scroll();
+ y--;
+ }
+ } else {
+ vidmem[(x + cols * y) * 2] = c;
+ if (++x >= cols) {
+ x = 0;
+ if (++y >= lines) {
+ scroll();
+ y--;
+ }
+ }
+ }
+ }
+
+ boot_params->screen_info.orig_x = x;
+ boot_params->screen_info.orig_y = y;
+
+ pos = (x + cols * y) * 2; /* Update cursor position */
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
+}
+
+void __puthex(unsigned long value)
+{
+ char alpha[2] = "0";
+ int bits;
+
+ for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) {
+ unsigned long digit = (value >> bits) & 0xf;
+
+ if (digit < 0xA)
+ alpha[0] = '0' + digit;
+ else
+ alpha[0] = 'a' + (digit - 0xA);
+
+ __putstr(alpha);
+ }
+}
+
+#if CONFIG_X86_NEED_RELOCS
+static void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
+{
+ int *reloc;
+ unsigned long delta, map, ptr;
+ unsigned long min_addr = (unsigned long)output;
+ unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
+
+ /*
+ * Calculate the delta between where vmlinux was linked to load
+ * and where it was actually loaded.
+ */
+ delta = min_addr - LOAD_PHYSICAL_ADDR;
+
+ /*
+ * The kernel contains a table of relocation addresses. Those
+ * addresses have the final load address of the kernel in virtual
+ * memory. We are currently working in the self map. So we need to
+ * create an adjustment for kernel memory addresses to the self map.
+ * This will involve subtracting out the base address of the kernel.
+ */
+ map = delta - __START_KERNEL_map;
+
+ /*
+ * 32-bit always performs relocations. 64-bit relocations are only
+ * needed if KASLR has chosen a different starting address offset
+ * from __START_KERNEL_map.
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ delta = virt_addr - LOAD_PHYSICAL_ADDR;
+
+ if (!delta) {
+ debug_putstr("No relocation needed... ");
+ return;
+ }
+ debug_putstr("Performing relocations... ");
+
+ /*
+ * Process relocations: 32 bit relocations first then 64 bit after.
+ * Three sets of binary relocations are added to the end of the kernel
+ * before compression. Each relocation table entry is the kernel
+ * address of the location which needs to be updated stored as a
+ * 32-bit value which is sign extended to 64 bits.
+ *
+ * Format is:
+ *
+ * kernel bits...
+ * 0 - zero terminator for 64 bit relocations
+ * 64 bit relocation repeated
+ * 0 - zero terminator for inverse 32 bit relocations
+ * 32 bit inverse relocation repeated
+ * 0 - zero terminator for 32 bit relocations
+ * 32 bit relocation repeated
+ *
+ * So we work backwards from the end of the decompressed image.
+ */
+ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("32-bit relocation outside of kernel!\n");
+
+ *(uint32_t *)ptr += delta;
+ }
+#ifdef CONFIG_X86_64
+ while (*--reloc) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("inverse 32-bit relocation outside of kernel!\n");
+
+ *(int32_t *)ptr -= delta;
+ }
+ for (reloc--; *reloc; reloc--) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("64-bit relocation outside of kernel!\n");
+
+ *(uint64_t *)ptr += delta;
+ }
+#endif
+}
+#else
+static inline void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
+{ }
+#endif
+
+static void parse_elf(void *output)
+{
+#ifdef CONFIG_X86_64
+ Elf64_Ehdr ehdr;
+ Elf64_Phdr *phdrs, *phdr;
+#else
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrs, *phdr;
+#endif
+ void *dest;
+ int i;
+
+ memcpy(&ehdr, output, sizeof(ehdr));
+ if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+ error("Kernel is not a valid ELF file");
+ return;
+ }
+
+ debug_putstr("Parsing ELF... ");
+
+ phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
+ if (!phdrs)
+ error("Failed to allocate space for phdrs");
+
+ memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ phdr = &phdrs[i];
+
+ switch (phdr->p_type) {
+ case PT_LOAD:
+#ifdef CONFIG_X86_64
+ if ((phdr->p_align % 0x200000) != 0)
+ error("Alignment of LOAD segment isn't multiple of 2MB");
+#endif
+#ifdef CONFIG_RELOCATABLE
+ dest = output;
+ dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+#else
+ dest = (void *)(phdr->p_paddr);
+#endif
+ memmove(dest, output + phdr->p_offset, phdr->p_filesz);
+ break;
+ default: /* Ignore other PT_* */ break;
+ }
+ }
+
+ free(phdrs);
+}
+
+/*
+ * The compressed kernel image (ZO), has been moved so that its position
+ * is against the end of the buffer used to hold the uncompressed kernel
+ * image (VO) and the execution environment (.bss, .brk), which makes sure
+ * there is room to do the in-place decompression. (See header.S for the
+ * calculations.)
+ *
+ * |-----compressed kernel image------|
+ * V V
+ * 0 extract_offset +INIT_SIZE
+ * |-----------|---------------|-------------------------|--------|
+ * | | | |
+ * VO__text startup_32 of ZO VO__end ZO__end
+ * ^ ^
+ * |-------uncompressed kernel image---------|
+ *
+ */
+asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ unsigned char *input_data,
+ unsigned long input_len,
+ unsigned char *output,
+ unsigned long output_len)
+{
+ const unsigned long kernel_total_size = VO__end - VO__text;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+
+ /* Retain x86 boot parameters pointer passed from startup_32/64. */
+ boot_params = rmode;
+
+ /* Clear flags intended for solely in-kernel use. */
+ boot_params->hdr.loadflags &= ~KASLR_FLAG;
+
+ sanitize_boot_params(boot_params);
+
+ if (boot_params->screen_info.orig_video_mode == 7) {
+ vidmem = (char *) 0xb0000;
+ vidport = 0x3b4;
+ } else {
+ vidmem = (char *) 0xb8000;
+ vidport = 0x3d4;
+ }
+
+ lines = boot_params->screen_info.orig_video_lines;
+ cols = boot_params->screen_info.orig_video_cols;
+
+ console_init();
+ debug_putstr("early console in extract_kernel\n");
+
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+
+ /* Report initial kernel position details. */
+ debug_putaddr(input_data);
+ debug_putaddr(input_len);
+ debug_putaddr(output);
+ debug_putaddr(output_len);
+ debug_putaddr(kernel_total_size);
+
+#ifdef CONFIG_X86_64
+ /* Report address of 32-bit trampoline */
+ debug_putaddr(trampoline_32bit);
+#endif
+
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ */
+ choose_random_location((unsigned long)input_data, input_len,
+ (unsigned long *)&output,
+ max(output_len, kernel_total_size),
+ &virt_addr);
+
+ /* Validate memory location choices. */
+ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+ error("Destination physical address inappropriately aligned");
+ if (virt_addr & (MIN_KERNEL_ALIGN - 1))
+ error("Destination virtual address inappropriately aligned");
+#ifdef CONFIG_X86_64
+ if (heap > 0x3fffffffffffUL)
+ error("Destination address too large");
+ if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
+#else
+ if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
+ error("Destination address too large");
+#endif
+#ifndef CONFIG_RELOCATABLE
+ if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
+ error("Destination address does not match LOAD_PHYSICAL_ADDR");
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
+ error("Destination virtual address changed when not relocatable");
+#endif
+
+ debug_putstr("\nDecompressing Linux... ");
+ __decompress(input_data, input_len, NULL, NULL, output, output_len,
+ NULL, error);
+ parse_elf(output);
+ handle_relocations(output, output_len, virt_addr);
+ debug_putstr("done.\nBooting the kernel.\n");
+ return output;
+}
+
+void fortify_panic(const char *name)
+{
+ error("detected buffer overflow");
+}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
new file mode 100644
index 000000000..47fd18db6
--- /dev/null
+++ b/arch/x86/boot/compressed/misc.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_MISC_H
+#define BOOT_COMPRESSED_MISC_H
+
+/*
+ * Special hack: we have to be careful, because no indirections are allowed here,
+ * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening. (This list needs to be extended when new
+ * paravirt and debugging variants are added.)
+ */
+#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
+#undef CONFIG_KASAN
+
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
+#include <linux/linkage.h>
+#include <linux/screen_info.h>
+#include <linux/elf.h>
+#include <linux/io.h>
+#include <asm/page.h>
+#include <asm/boot.h>
+#include <asm/bootparam.h>
+
+#define BOOT_BOOT_H
+#include "../ctype.h"
+
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
+/* misc.c */
+extern memptr free_mem_ptr;
+extern memptr free_mem_end_ptr;
+extern struct boot_params *boot_params;
+void __putstr(const char *s);
+void __puthex(unsigned long value);
+#define error_putstr(__x) __putstr(__x)
+#define error_puthex(__x) __puthex(__x)
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+
+#define debug_putstr(__x) __putstr(__x)
+#define debug_puthex(__x) __puthex(__x)
+#define debug_putaddr(__x) { \
+ debug_putstr(#__x ": 0x"); \
+ debug_puthex((unsigned long)(__x)); \
+ debug_putstr("\n"); \
+ }
+
+#else
+
+static inline void debug_putstr(const char *s)
+{ }
+static inline void debug_puthex(const char *s)
+{ }
+#define debug_putaddr(x) /* */
+
+#endif
+
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
+/* cmdline.c */
+int cmdline_find_option(const char *option, char *buffer, int bufsize);
+int cmdline_find_option_bool(const char *option);
+#endif
+
+
+#if CONFIG_RANDOMIZE_BASE
+/* kaslr.c */
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr);
+/* cpuflags.c */
+bool has_cpuflag(int flag);
+#else
+static inline void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void initialize_identity_maps(void);
+void add_identity_map(unsigned long start, unsigned long size);
+void finalize_identity_maps(void);
+extern unsigned char _pgtable[];
+#else
+static inline void initialize_identity_maps(void)
+{ }
+static inline void add_identity_map(unsigned long start, unsigned long size)
+{ }
+static inline void finalize_identity_maps(void)
+{ }
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+#else
+static const int early_serial_base;
+static inline void console_init(void)
+{ }
+#endif
+
+void set_sev_encryption_mask(void);
+
+#endif
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 000000000..72bad2c8d
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,82 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * -----------------------------------------------------------------------
+ *
+ * Outputs a small assembly wrapper with the appropriate symbols defined.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <tools/le_byteshift.h>
+
+int main(int argc, char *argv[])
+{
+ uint32_t olen;
+ long ilen;
+ FILE *f = NULL;
+ int retval = 1;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+ goto bail;
+ }
+
+ /* Get the information for the compressed kernel image first */
+
+ f = fopen(argv[1], "r");
+ if (!f) {
+ perror(argv[1]);
+ goto bail;
+ }
+
+
+ if (fseek(f, -4L, SEEK_END)) {
+ perror(argv[1]);
+ }
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ goto bail;
+ }
+
+ ilen = ftell(f);
+ olen = get_unaligned_le32(&olen);
+
+ printf(".section \".rodata..compressed\",\"a\",@progbits\n");
+ printf(".globl z_input_len\n");
+ printf("z_input_len = %lu\n", ilen);
+ printf(".globl z_output_len\n");
+ printf("z_output_len = %lu\n", (unsigned long)olen);
+
+ printf(".globl input_data, input_data_end\n");
+ printf("input_data:\n");
+ printf(".incbin \"%s\"\n", argv[1]);
+ printf("input_data_end:\n");
+
+ retval = 0;
+bail:
+ if (f)
+ fclose(f);
+ return retval;
+}
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
new file mode 100644
index 000000000..6ff7e81b5
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -0,0 +1,20 @@
+#ifndef BOOT_COMPRESSED_PAGETABLE_H
+#define BOOT_COMPRESSED_PAGETABLE_H
+
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x70
+
+#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+
+#ifndef __ASSEMBLER__
+
+extern unsigned long *trampoline_32bit;
+
+extern void trampoline_32bit_src(void *return_ptr);
+
+#endif /* __ASSEMBLER__ */
+#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000..76e1edf5b
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,211 @@
+#include <linux/efi.h>
+#include <asm/e820/types.h>
+#include <asm/processor.h>
+#include <asm/efi.h>
+#include "pgtable.h"
+#include "../string.h"
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
+#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+
+struct paging_config {
+ unsigned long trampoline_start;
+ unsigned long l5_required;
+};
+
+/* Buffer to preserve trampoline memory */
+static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+
+/*
+ * Trampoline address will be printed by extract_kernel() for debugging
+ * purposes.
+ *
+ * Avoid putting the pointer into .bss as it will be cleared between
+ * paging_prepare() and extract_kernel().
+ */
+unsigned long *trampoline_32bit __section(.data);
+
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+static unsigned long find_trampoline_placement(void)
+{
+ unsigned long bios_start = 0, ebda_start = 0;
+ unsigned long trampoline_start;
+ struct boot_e820_entry *entry;
+ char *signature;
+ int i;
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ /*
+ * EFI systems may not provide legacy ROM. The memory may not be mapped
+ * at all.
+ *
+ * Only look for values in the legacy ROM for non-EFI system.
+ */
+ signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+ }
+
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Find the first usable memory region under bios_start. */
+ for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ unsigned long new = bios_start;
+
+ entry = &boot_params->e820_table[i];
+
+ /* Skip all entries above bios_start. */
+ if (bios_start <= entry->addr)
+ continue;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ /* Adjust bios_start to the end of the entry if needed. */
+ if (bios_start > entry->addr + entry->size)
+ new = entry->addr + entry->size;
+
+ /* Keep bios_start page-aligned. */
+ new = round_down(new, PAGE_SIZE);
+
+ /* Skip the entry if it's too small. */
+ if (new - TRAMPOLINE_32BIT_SIZE < entry->addr)
+ continue;
+
+ /* Protect against underflow. */
+ if (new - TRAMPOLINE_32BIT_SIZE > bios_start)
+ break;
+
+ bios_start = new;
+ break;
+ }
+
+ /* Place the trampoline just below the end of low memory */
+ return bios_start - TRAMPOLINE_32BIT_SIZE;
+}
+
+struct paging_config paging_prepare(void *rmode)
+{
+ struct paging_config paging_config = {};
+
+ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
+ boot_params = rmode;
+
+ /*
+ * Check if LA57 is desired and supported.
+ *
+ * There are several parts to the check:
+ * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if user asked to disable 5-level paging: no5lvl in cmdline
+ * - if the machine supports 5-level paging:
+ * + CPUID leaf 7 is supported
+ * + the leaf has the feature bit set
+ *
+ * That's substitute for boot_cpu_has() in early boot code.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ !cmdline_find_option_bool("no5lvl") &&
+ native_cpuid_eax(0) >= 7 &&
+ (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+ paging_config.l5_required = 1;
+ }
+
+ paging_config.trampoline_start = find_trampoline_placement();
+
+ trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+
+ /* Preserve trampoline memory */
+ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
+
+ /* Clear trampoline memory first */
+ memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
+
+ /* Copy trampoline code in place */
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
+ /*
+ * The code below prepares page table in trampoline memory.
+ *
+ * The new page table will be used by trampoline code for switching
+ * from 4- to 5-level paging or vice versa.
+ *
+ * If switching is not required, the page table is unused: trampoline
+ * code wouldn't touch CR3.
+ */
+
+ /*
+ * We are not going to use the page table in trampoline memory if we
+ * are already in the desired paging mode.
+ */
+ if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ goto out;
+
+ if (paging_config.l5_required) {
+ /*
+ * For 4- to 5-level paging transition, set up current CR3 as
+ * the first and the only entry in a new top-level page table.
+ */
+ trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ } else {
+ unsigned long src;
+
+ /*
+ * For 5- to 4-level paging transition, copy page table pointed
+ * by first entry in the current top-level page table as our
+ * new top-level page table.
+ *
+ * We cannot just point to the page table from trampoline as it
+ * may be above 4G.
+ */
+ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+ (void *)src, PAGE_SIZE);
+ }
+
+out:
+ return paging_config;
+}
+
+void cleanup_trampoline(void *pgtable)
+{
+ void *trampoline_pgtable;
+
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+
+ /*
+ * Move the top level page table out of trampoline memory,
+ * if it's there.
+ */
+ if ((void *)__native_read_cr3() == trampoline_pgtable) {
+ memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);
+ }
+
+ /* Restore trampoline memory */
+ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+}
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
new file mode 100644
index 000000000..19dbbcdd1
--- /dev/null
+++ b/arch/x86/boot/compressed/string.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This provides an optimized implementation of memcpy, and a simplified
+ * implementation of memset and memmove. These are used here because the
+ * standard kernel runtime versions are not yet available and we don't
+ * trust the gcc built-in implementations as they may do unexpected things
+ * (e.g. FPU ops) in the minimal decompression stub execution environment.
+ */
+#include "error.h"
+
+#include "../string.c"
+
+#ifdef CONFIG_X86_32
+static void *__memcpy(void *dest, const void *src, size_t n)
+{
+ int d0, d1, d2;
+ asm volatile(
+ "rep ; movsl\n\t"
+ "movl %4,%%ecx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
+}
+#else
+static void *__memcpy(void *dest, const void *src, size_t n)
+{
+ long d0, d1, d2;
+ asm volatile(
+ "rep ; movsq\n\t"
+ "movq %4,%%rcx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
+}
+#endif
+
+void *memset(void *s, int c, size_t n)
+{
+ int i;
+ char *ss = s;
+
+ for (i = 0; i < n; i++)
+ ss[i] = c;
+ return s;
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ unsigned char *d = dest;
+ const unsigned char *s = src;
+
+ if (d <= s || d - s >= n)
+ return __memcpy(dest, src, n);
+
+ while (n-- > 0)
+ d[n] = s[n];
+
+ return dest;
+}
+
+/* Detect and warn about potential overlaps, but handle them with memmove. */
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ if (dest > src && dest - src < n) {
+ warn("Avoiding potentially unsafe overlapping memcpy()!");
+ return memmove(dest, src, n);
+ }
+ return __memcpy(dest, src, n);
+}
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000..f491bbde8
--- /dev/null
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#undef i386
+
+#include <asm/cache.h>
+#include <asm/page_types.h>
+
+#ifdef CONFIG_X86_64
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
+SECTIONS
+{
+ /* Be careful parts of head_64.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .rodata..compressed : {
+ *(.rodata..compressed)
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .got : {
+ _got = .;
+ KEEP(*(.got.plt))
+ KEEP(*(.got))
+ _egot = .;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ . = ALIGN(L1_CACHE_BYTES);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ .pgtable : {
+ _pgtable = . ;
+ *(.pgtable)
+ _epgtable = . ;
+ }
+#endif
+ . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
+ _end = .;
+}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
new file mode 100644
index 000000000..15d9f74b0
--- /dev/null
+++ b/arch/x86/boot/copy.S
@@ -0,0 +1,67 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/linkage.h>
+
+/*
+ * Memory copy routines
+ */
+
+ .code16
+ .text
+
+GLOBAL(memcpy)
+ pushw %si
+ pushw %di
+ movw %ax, %di
+ movw %dx, %si
+ pushw %cx
+ shrw $2, %cx
+ rep; movsl
+ popw %cx
+ andw $3, %cx
+ rep; movsb
+ popw %di
+ popw %si
+ retl
+ENDPROC(memcpy)
+
+GLOBAL(memset)
+ pushw %di
+ movw %ax, %di
+ movzbl %dl, %eax
+ imull $0x01010101,%eax
+ pushw %cx
+ shrw $2, %cx
+ rep; stosl
+ popw %cx
+ andw $3, %cx
+ rep; stosb
+ popw %di
+ retl
+ENDPROC(memset)
+
+GLOBAL(copy_from_fs)
+ pushw %ds
+ pushw %fs
+ popw %ds
+ calll memcpy
+ popw %ds
+ retl
+ENDPROC(copy_from_fs)
+
+GLOBAL(copy_to_fs)
+ pushw %es
+ pushw %fs
+ popw %es
+ calll memcpy
+ popw %es
+ retl
+ENDPROC(copy_to_fs)
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
new file mode 100644
index 000000000..26240dde0
--- /dev/null
+++ b/arch/x86/boot/cpu.c
@@ -0,0 +1,101 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/x86/boot/cpu.c
+ *
+ * Check for obligatory CPU features and abort if the features are not
+ * present.
+ */
+
+#include "boot.h"
+#ifdef CONFIG_X86_FEATURE_NAMES
+#include "cpustr.h"
+#endif
+
+static char *cpu_name(int level)
+{
+ static char buf[6];
+
+ if (level == 64) {
+ return "x86-64";
+ } else {
+ if (level == 15)
+ level = 6;
+ sprintf(buf, "i%d86", level);
+ return buf;
+ }
+}
+
+static void show_cap_strs(u32 *err_flags)
+{
+ int i, j;
+#ifdef CONFIG_X86_FEATURE_NAMES
+ const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (msg_strs[0] < i ||
+ (msg_strs[0] == i && msg_strs[1] < j)) {
+ /* Skip to the next string */
+ msg_strs += 2;
+ while (*msg_strs++)
+ ;
+ }
+ if (e & 1) {
+ if (msg_strs[0] == i &&
+ msg_strs[1] == j &&
+ msg_strs[2])
+ printf("%s ", msg_strs+2);
+ else
+ printf("%d:%d ", i, j);
+ }
+ e >>= 1;
+ }
+ }
+#else
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (e & 1)
+ printf("%d:%d ", i, j);
+ e >>= 1;
+ }
+ }
+#endif
+}
+
+int validate_cpu(void)
+{
+ u32 *err_flags;
+ int cpu_level, req_level;
+
+ check_cpu(&cpu_level, &req_level, &err_flags);
+
+ if (cpu_level < req_level) {
+ printf("This kernel requires an %s CPU, ",
+ cpu_name(req_level));
+ printf("but only detected an %s CPU.\n",
+ cpu_name(cpu_level));
+ return -1;
+ }
+
+ if (err_flags) {
+ puts("This kernel requires the following features "
+ "not present on the CPU:\n");
+ show_cap_strs(err_flags);
+ putchar('\n');
+ return -1;
+ } else if (check_knl_erratum()) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
new file mode 100644
index 000000000..8f0c4c9fc
--- /dev/null
+++ b/arch/x86/boot/cpucheck.c
@@ -0,0 +1,229 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Check for obligatory CPU features and abort if the features are not
+ * present. This code should be compilable as 16-, 32- or 64-bit
+ * code, so be very careful with types and inline assembly.
+ *
+ * This code should not contain any messages; that requires an
+ * additional wrapper.
+ *
+ * As written, this code is not safe for inclusion into the kernel
+ * proper (after FPU initialization, in particular).
+ */
+
+#ifdef _SETUP
+# include "boot.h"
+#endif
+#include <linux/types.h>
+#include <asm/intel-family.h>
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "string.h"
+
+static u32 err_flags[NCAPINTS];
+
+static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
+
+static const u32 req_flags[NCAPINTS] =
+{
+ REQUIRED_MASK0,
+ REQUIRED_MASK1,
+ 0, /* REQUIRED_MASK2 not implemented in this file */
+ 0, /* REQUIRED_MASK3 not implemented in this file */
+ REQUIRED_MASK4,
+ 0, /* REQUIRED_MASK5 not implemented in this file */
+ REQUIRED_MASK6,
+ 0, /* REQUIRED_MASK7 not implemented in this file */
+ 0, /* REQUIRED_MASK8 not implemented in this file */
+ 0, /* REQUIRED_MASK9 not implemented in this file */
+ 0, /* REQUIRED_MASK10 not implemented in this file */
+ 0, /* REQUIRED_MASK11 not implemented in this file */
+ 0, /* REQUIRED_MASK12 not implemented in this file */
+ 0, /* REQUIRED_MASK13 not implemented in this file */
+ 0, /* REQUIRED_MASK14 not implemented in this file */
+ 0, /* REQUIRED_MASK15 not implemented in this file */
+ REQUIRED_MASK16,
+};
+
+#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
+
+static int is_amd(void)
+{
+ return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
+ cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
+ cpu_vendor[2] == A32('c', 'A', 'M', 'D');
+}
+
+static int is_centaur(void)
+{
+ return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
+ cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
+ cpu_vendor[2] == A32('a', 'u', 'l', 's');
+}
+
+static int is_transmeta(void)
+{
+ return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+ cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
+ cpu_vendor[2] == A32('M', 'x', '8', '6');
+}
+
+static int is_intel(void)
+{
+ return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+ cpu_vendor[1] == A32('i', 'n', 'e', 'I') &&
+ cpu_vendor[2] == A32('n', 't', 'e', 'l');
+}
+
+/* Returns a bitmask of which words we have error bits in */
+static int check_cpuflags(void)
+{
+ u32 err;
+ int i;
+
+ err = 0;
+ for (i = 0; i < NCAPINTS; i++) {
+ err_flags[i] = req_flags[i] & ~cpu.flags[i];
+ if (err_flags[i])
+ err |= 1 << i;
+ }
+
+ return err;
+}
+
+/*
+ * Returns -1 on error.
+ *
+ * *cpu_level is set to the current CPU level; *req_level to the required
+ * level. x86-64 is considered level 64 for this purpose.
+ *
+ * *err_flags_ptr is set to the flags error array if there are flags missing.
+ */
+int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
+{
+ int err;
+
+ memset(&cpu.flags, 0, sizeof cpu.flags);
+ cpu.level = 3;
+
+ if (has_eflag(X86_EFLAGS_AC))
+ cpu.level = 4;
+
+ get_cpuflags();
+ err = check_cpuflags();
+
+ if (test_bit(X86_FEATURE_LM, cpu.flags))
+ cpu.level = 64;
+
+ if (err == 0x01 &&
+ !(err_flags[0] &
+ ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
+ is_amd()) {
+ /* If this is an AMD and we're only missing SSE+SSE2, try to
+ turn them on */
+
+ u32 ecx = MSR_K7_HWCR;
+ u32 eax, edx;
+
+ asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+ eax &= ~(1 << 15);
+ asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+
+ get_cpuflags(); /* Make sure it really did something */
+ err = check_cpuflags();
+ } else if (err == 0x01 &&
+ !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
+ is_centaur() && cpu.model >= 6) {
+ /* If this is a VIA C3, we might have to enable CX8
+ explicitly */
+
+ u32 ecx = MSR_VIA_FCR;
+ u32 eax, edx;
+
+ asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+ eax |= (1<<1)|(1<<7);
+ asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+
+ set_bit(X86_FEATURE_CX8, cpu.flags);
+ err = check_cpuflags();
+ } else if (err == 0x01 && is_transmeta()) {
+ /* Transmeta might have masked feature bits in word 0 */
+
+ u32 ecx = 0x80860004;
+ u32 eax, edx;
+ u32 level = 1;
+
+ asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+ asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
+ asm("cpuid"
+ : "+a" (level), "=d" (cpu.flags[0])
+ : : "ecx", "ebx");
+ asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+
+ err = check_cpuflags();
+ } else if (err == 0x01 &&
+ !(err_flags[0] & ~(1 << X86_FEATURE_PAE)) &&
+ is_intel() && cpu.level == 6 &&
+ (cpu.model == 9 || cpu.model == 13)) {
+ /* PAE is disabled on this Pentium M but can be forced */
+ if (cmdline_find_option_bool("forcepae")) {
+ puts("WARNING: Forcing PAE in CPU flags\n");
+ set_bit(X86_FEATURE_PAE, cpu.flags);
+ err = check_cpuflags();
+ }
+ else {
+ puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
+ }
+ }
+ if (!err)
+ err = check_knl_erratum();
+
+ if (err_flags_ptr)
+ *err_flags_ptr = err ? err_flags : NULL;
+ if (cpu_level_ptr)
+ *cpu_level_ptr = cpu.level;
+ if (req_level_ptr)
+ *req_level_ptr = req_level;
+
+ return (cpu.level < req_level || err) ? -1 : 0;
+}
+
+int check_knl_erratum(void)
+{
+ /*
+ * First check for the affected model/family:
+ */
+ if (!is_intel() ||
+ cpu.family != 6 ||
+ cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+ return 0;
+
+ /*
+ * This erratum affects the Accessed/Dirty bits, and can
+ * cause stray bits to be set in !Present PTEs. We have
+ * enough bits in our 64-bit PTEs (which we have on real
+ * 64-bit mode or PAE) to avoid using these troublesome
+ * bits. But, we do not have enough space in our 32-bit
+ * PTEs. So, refuse to run on 32-bit non-PAE kernels.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
+ return 0;
+
+ puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
+ "processor due to a processor erratum. Use a 64-bit\n"
+ "kernel, or enable PAE in this 32-bit kernel.\n\n");
+
+ return -1;
+}
+
+
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 000000000..a0b75f73d
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+ u16 fcw = -1, fsw = -1;
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0));
+ if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+ cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ }
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+/*
+ * For building the 16-bit code we want to explicitly specify 32-bit
+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
+ * letting the compiler choose. But this is also included from the
+ * compressed/ directory where it may be 64-bit code, and thus needs
+ * to be 'pushfq' or 'popfq' in that case.
+ */
+#ifdef __x86_64__
+#define PUSHF "pushfq"
+#define POPF "popfq"
+#else
+#define PUSHF "pushfl"
+#define POPF "popfl"
+#endif
+
+int has_eflag(unsigned long mask)
+{
+ unsigned long f0, f1;
+
+ asm volatile(PUSHF " \n\t"
+ PUSHF " \n\t"
+ "pop %0 \n\t"
+ "mov %0,%1 \n\t"
+ "xor %2,%1 \n\t"
+ "push %1 \n\t"
+ POPF " \n\t"
+ PUSHF " \n\t"
+ "pop %1 \n\t"
+ POPF
+ : "=&r" (f0), "=&r" (f1)
+ : "ri" (mask));
+
+ return !!((f0^f1) & mask);
+}
+
+/* Handle x86_32 PIC using ebx. */
+#if defined(__i386__) && defined(__PIC__)
+# define EBX_REG "=r"
+#else
+# define EBX_REG "=b"
+#endif
+
+static inline void cpuid_count(u32 id, u32 count,
+ u32 *a, u32 *b, u32 *c, u32 *d)
+{
+ asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
+ "cpuid \n\t"
+ ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
+ : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
+ : "a" (id), "c" (count)
+ );
+}
+
+#define cpuid(id, a, b, c, d) cpuid_count(id, 0, a, b, c, d)
+
+void get_cpuflags(void)
+{
+ u32 max_intel_level, max_amd_level;
+ u32 tfms;
+ u32 ignored;
+
+ if (loaded_flags)
+ return;
+ loaded_flags = true;
+
+ if (has_fpu())
+ set_bit(X86_FEATURE_FPU, cpu.flags);
+
+ if (has_eflag(X86_EFLAGS_ID)) {
+ cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+ &cpu_vendor[1]);
+
+ if (max_intel_level >= 0x00000001 &&
+ max_intel_level <= 0x0000ffff) {
+ cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+ &cpu.flags[0]);
+ cpu.level = (tfms >> 8) & 15;
+ cpu.family = cpu.level;
+ cpu.model = (tfms >> 4) & 15;
+ if (cpu.level >= 6)
+ cpu.model += ((tfms >> 16) & 0xf) << 4;
+ }
+
+ if (max_intel_level >= 0x00000007) {
+ cpuid_count(0x00000007, 0, &ignored, &ignored,
+ &cpu.flags[16], &ignored);
+ }
+
+ cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+ &ignored);
+
+ if (max_amd_level >= 0x80000001 &&
+ max_amd_level <= 0x8000ffff) {
+ cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+ &cpu.flags[1]);
+ }
+ }
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 000000000..2e20814d3
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeatures.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int family; /* Family, always */
+ int model;
+ u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_cpuflags(void);
+
+#endif
diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h
new file mode 100644
index 000000000..8f5ef2994
--- /dev/null
+++ b/arch/x86/boot/ctype.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_CTYPE_H
+#define BOOT_CTYPE_H
+
+static inline int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static inline int isxdigit(int ch)
+{
+ if (isdigit(ch))
+ return true;
+
+ if ((ch >= 'a') && (ch <= 'f'))
+ return true;
+
+ return (ch >= 'A') && (ch <= 'F');
+}
+
+#endif
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
new file mode 100644
index 000000000..b25c53527
--- /dev/null
+++ b/arch/x86/boot/early_serial_console.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Serial port routines for use during early boot reporting. This code is
+ * included from both the compressed kernel and the regular kernel.
+ */
+#include "boot.h"
+
+#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
+
+#define DLAB 0x80
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define RXR 0 /* Receive register (READ) */
+#define IER 1 /* Interrupt Enable */
+#define IIR 2 /* Interrupt ID */
+#define FCR 2 /* FIFO control */
+#define LCR 3 /* Line control */
+#define MCR 4 /* Modem control */
+#define LSR 5 /* Line Status */
+#define MSR 6 /* Modem Status */
+#define DLL 0 /* Divisor Latch Low */
+#define DLH 1 /* Divisor latch High */
+
+#define DEFAULT_BAUD 9600
+
+static void early_serial_init(int port, int baud)
+{
+ unsigned char c;
+ unsigned divisor;
+
+ outb(0x3, port + LCR); /* 8n1 */
+ outb(0, port + IER); /* no interrupt */
+ outb(0, port + FCR); /* no fifo */
+ outb(0x3, port + MCR); /* DTR + RTS */
+
+ divisor = 115200 / baud;
+ c = inb(port + LCR);
+ outb(c | DLAB, port + LCR);
+ outb(divisor & 0xff, port + DLL);
+ outb((divisor >> 8) & 0xff, port + DLH);
+ outb(c & ~DLAB, port + LCR);
+
+ early_serial_base = port;
+}
+
+static void parse_earlyprintk(void)
+{
+ int baud = DEFAULT_BAUD;
+ char arg[32];
+ int pos = 0;
+ int port = 0;
+
+ if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
+ char *e;
+
+ if (!strncmp(arg, "serial", 6)) {
+ port = DEFAULT_SERIAL_PORT;
+ pos += 6;
+ }
+
+ if (arg[pos] == ',')
+ pos++;
+
+ /*
+ * make sure we have
+ * "serial,0x3f8,115200"
+ * "serial,ttyS0,115200"
+ * "ttyS0,115200"
+ */
+ if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
+ port = simple_strtoull(arg + pos, &e, 16);
+ if (port == 0 || arg + pos == e)
+ port = DEFAULT_SERIAL_PORT;
+ else
+ pos = e - arg;
+ } else if (!strncmp(arg + pos, "ttyS", 4)) {
+ static const int bases[] = { 0x3f8, 0x2f8 };
+ int idx = 0;
+
+ /* += strlen("ttyS"); */
+ pos += 4;
+
+ if (arg[pos++] == '1')
+ idx = 1;
+
+ port = bases[idx];
+ }
+
+ if (arg[pos] == ',')
+ pos++;
+
+ baud = simple_strtoull(arg + pos, &e, 0);
+ if (baud == 0 || arg + pos == e)
+ baud = DEFAULT_BAUD;
+ }
+
+ if (port)
+ early_serial_init(port, baud);
+}
+
+#define BASE_BAUD (1843200/16)
+static unsigned int probe_baud(int port)
+{
+ unsigned char lcr, dll, dlh;
+ unsigned int quot;
+
+ lcr = inb(port + LCR);
+ outb(lcr | DLAB, port + LCR);
+ dll = inb(port + DLL);
+ dlh = inb(port + DLH);
+ outb(lcr, port + LCR);
+ quot = (dlh << 8) | dll;
+
+ return BASE_BAUD / quot;
+}
+
+static void parse_console_uart8250(void)
+{
+ char optstr[64], *options;
+ int baud = DEFAULT_BAUD;
+ int port = 0;
+
+ /*
+ * console=uart8250,io,0x3f8,115200n8
+ * need to make sure it is last one console !
+ */
+ if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
+ return;
+
+ options = optstr;
+
+ if (!strncmp(options, "uart8250,io,", 12))
+ port = simple_strtoull(options + 12, &options, 0);
+ else if (!strncmp(options, "uart,io,", 8))
+ port = simple_strtoull(options + 8, &options, 0);
+ else
+ return;
+
+ if (options && (options[0] == ','))
+ baud = simple_strtoull(options + 1, &options, 0);
+ else
+ baud = probe_baud(port);
+
+ if (port)
+ early_serial_init(port, baud);
+}
+
+void console_init(void)
+{
+ parse_earlyprintk();
+
+ if (!early_serial_base)
+ parse_console_uart8250();
+}
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
new file mode 100644
index 000000000..223e42527
--- /dev/null
+++ b/arch/x86/boot/edd.c
@@ -0,0 +1,182 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Get EDD BIOS disk information
+ */
+
+#include "boot.h"
+#include <linux/edd.h>
+#include "string.h"
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+
+/*
+ * Read the MBR (first sector) from a specific device.
+ */
+static int read_mbr(u8 devno, void *buf)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0x0201; /* Legacy Read, one sector */
+ ireg.cx = 0x0001; /* Sector 0-0-1 */
+ ireg.dl = devno;
+ ireg.bx = (size_t)buf;
+
+ intcall(0x13, &ireg, &oreg);
+
+ return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
+}
+
+static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
+{
+ int sector_size;
+ char *mbrbuf_ptr, *mbrbuf_end;
+ u32 buf_base, mbr_base;
+ extern char _end[];
+ u16 mbr_magic;
+
+ sector_size = ei->params.bytes_per_sector;
+ if (!sector_size)
+ sector_size = 512; /* Best available guess */
+
+ /* Produce a naturally aligned buffer on the heap */
+ buf_base = (ds() << 4) + (u32)&_end;
+ mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
+ mbrbuf_ptr = _end + (mbr_base-buf_base);
+ mbrbuf_end = mbrbuf_ptr + sector_size;
+
+ /* Make sure we actually have space on the heap... */
+ if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
+ return -1;
+ if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
+ return -1;
+
+ memset(mbrbuf_ptr, 0, sector_size);
+ if (read_mbr(devno, mbrbuf_ptr))
+ return -1;
+
+ *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
+ mbr_magic = *(u16 *)&mbrbuf_ptr[510];
+
+ /* check for valid MBR magic */
+ return mbr_magic == 0xAA55 ? 0 : -1;
+}
+
+static int get_edd_info(u8 devno, struct edd_info *ei)
+{
+ struct biosregs ireg, oreg;
+
+ memset(ei, 0, sizeof *ei);
+
+ /* Check Extensions Present */
+
+ initregs(&ireg);
+ ireg.ah = 0x41;
+ ireg.bx = EDDMAGIC1;
+ ireg.dl = devno;
+ intcall(0x13, &ireg, &oreg);
+
+ if (oreg.eflags & X86_EFLAGS_CF)
+ return -1; /* No extended information */
+
+ if (oreg.bx != EDDMAGIC2)
+ return -1;
+
+ ei->device = devno;
+ ei->version = oreg.ah; /* EDD version number */
+ ei->interface_support = oreg.cx; /* EDD functionality subsets */
+
+ /* Extended Get Device Parameters */
+
+ ei->params.length = sizeof(ei->params);
+ ireg.ah = 0x48;
+ ireg.si = (size_t)&ei->params;
+ intcall(0x13, &ireg, &oreg);
+
+ /* Get legacy CHS parameters */
+
+ /* Ralf Brown recommends setting ES:DI to 0:0 */
+ ireg.ah = 0x08;
+ ireg.es = 0;
+ intcall(0x13, &ireg, &oreg);
+
+ if (!(oreg.eflags & X86_EFLAGS_CF)) {
+ ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+ ei->legacy_max_head = oreg.dh;
+ ei->legacy_sectors_per_track = oreg.cl & 0x3f;
+ }
+
+ return 0;
+}
+
+void query_edd(void)
+{
+ char eddarg[8];
+ int do_mbr = 1;
+#ifdef CONFIG_EDD_OFF
+ int do_edd = 0;
+#else
+ int do_edd = 1;
+#endif
+ int be_quiet;
+ int devno;
+ struct edd_info ei, *edp;
+ u32 *mbrptr;
+
+ if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
+ if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
+ do_edd = 1;
+ do_mbr = 0;
+ }
+ else if (!strcmp(eddarg, "off"))
+ do_edd = 0;
+ else if (!strcmp(eddarg, "on"))
+ do_edd = 1;
+ }
+
+ be_quiet = cmdline_find_option_bool("quiet");
+
+ edp = boot_params.eddbuf;
+ mbrptr = boot_params.edd_mbr_sig_buffer;
+
+ if (!do_edd)
+ return;
+
+ /* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
+ * so give a hint if this happens.
+ */
+
+ if (!be_quiet)
+ printf("Probing EDD (edd=off to disable)... ");
+
+ for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
+ /*
+ * Scan the BIOS-supported hard disks and query EDD
+ * information...
+ */
+ if (!get_edd_info(devno, &ei)
+ && boot_params.eddbuf_entries < EDDMAXNR) {
+ memcpy(edp, &ei, sizeof ei);
+ edp++;
+ boot_params.eddbuf_entries++;
+ }
+
+ if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
+ boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
+ }
+
+ if (!be_quiet)
+ printf("ok\n");
+}
+
+#endif
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
new file mode 100644
index 000000000..6a10d52a4
--- /dev/null
+++ b/arch/x86/boot/genimage.sh
@@ -0,0 +1,130 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2017 by Changbin Du <changbin.du@intel.com>
+#
+# Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others
+#
+# "make fdimage/fdimage144/fdimage288/isoimage" script for x86 architecture
+#
+# Arguments:
+# $1 - fdimage format
+# $2 - target image file
+# $3 - kernel bzImage file
+# $4 - mtool configuration file
+# $5 - kernel cmdline
+# $6 - inird image file
+#
+
+# Use "make V=1" to debug this script
+case "${KBUILD_VERBOSE}" in
+*1*)
+ set -x
+ ;;
+esac
+
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+
+export MTOOLSRC=$4
+FIMAGE=$2
+FBZIMAGE=$3
+KCMDLINE=$5
+FDINITRD=$6
+
+# Make sure the files actually exist
+verify "$FBZIMAGE"
+
+genbzdisk() {
+ verify "$MTOOLSRC"
+ mformat a:
+ syslinux $FIMAGE
+ echo "$KCMDLINE" | mcopy - a:syslinux.cfg
+ if [ -f "$FDINITRD" ] ; then
+ mcopy "$FDINITRD" a:initrd.img
+ fi
+ mcopy $FBZIMAGE a:linux
+}
+
+genfdimage144() {
+ verify "$MTOOLSRC"
+ dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
+ mformat v:
+ syslinux $FIMAGE
+ echo "$KCMDLINE" | mcopy - v:syslinux.cfg
+ if [ -f "$FDINITRD" ] ; then
+ mcopy "$FDINITRD" v:initrd.img
+ fi
+ mcopy $FBZIMAGE v:linux
+}
+
+genfdimage288() {
+ verify "$MTOOLSRC"
+ dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
+ mformat w:
+ syslinux $FIMAGE
+ echo "$KCMDLINE" | mcopy - W:syslinux.cfg
+ if [ -f "$FDINITRD" ] ; then
+ mcopy "$FDINITRD" w:initrd.img
+ fi
+ mcopy $FBZIMAGE w:linux
+}
+
+geniso() {
+ tmp_dir=`dirname $FIMAGE`/isoimage
+ rm -rf $tmp_dir
+ mkdir $tmp_dir
+ for i in lib lib64 share ; do
+ for j in syslinux ISOLINUX ; do
+ if [ -f /usr/$i/$j/isolinux.bin ] ; then
+ isolinux=/usr/$i/$j/isolinux.bin
+ fi
+ done
+ for j in syslinux syslinux/modules/bios ; do
+ if [ -f /usr/$i/$j/ldlinux.c32 ]; then
+ ldlinux=/usr/$i/$j/ldlinux.c32
+ fi
+ done
+ if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
+ break
+ fi
+ done
+ if [ -z "$isolinux" ] ; then
+ echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ if [ -z "$ldlinux" ] ; then
+ echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ cp $isolinux $tmp_dir
+ cp $ldlinux $tmp_dir
+ cp $FBZIMAGE $tmp_dir/linux
+ echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
+ if [ -f "$FDINITRD" ] ; then
+ cp "$FDINITRD" $tmp_dir/initrd.img
+ fi
+ genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
+ -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
+ -boot-info-table $tmp_dir
+ isohybrid $FIMAGE 2>/dev/null || true
+ rm -rf $tmp_dir
+}
+
+case $1 in
+ bzdisk) genbzdisk;;
+ fdimage144) genfdimage144;;
+ fdimage288) genfdimage288;;
+ isoimage) geniso;;
+ *) echo 'Unknown image format'; exit 1;
+esac
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
new file mode 100644
index 000000000..850b8762e
--- /dev/null
+++ b/arch/x86/boot/header.S
@@ -0,0 +1,636 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * header.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Based on bootsect.S and setup.S
+ * modified by more people than can be counted
+ *
+ * Rewritten as a common file by H. Peter Anvin (Apr 2007)
+ *
+ * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
+ * addresses must be multiplied by 16 to obtain their respective linear
+ * addresses. To avoid confusion, linear addresses are written using leading
+ * hex while segment addresses are written as segment:offset.
+ *
+ */
+
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/page_types.h>
+#include <asm/setup.h>
+#include <asm/bootparam.h>
+#include "boot.h"
+#include "voffset.h"
+#include "zoffset.h"
+
+BOOTSEG = 0x07C0 /* original address of boot-sector */
+SYSSEG = 0x1000 /* historical load address >> 4 */
+
+#ifndef SVGA_MODE
+#define SVGA_MODE ASK_VGA
+#endif
+
+#ifndef ROOT_RDONLY
+#define ROOT_RDONLY 1
+#endif
+
+ .code16
+ .section ".bstext", "ax"
+
+ .global bootsect_start
+bootsect_start:
+#ifdef CONFIG_EFI_STUB
+ # "MZ", MS-DOS header
+ .byte 0x4d
+ .byte 0x5a
+#endif
+
+ # Normalize the start address
+ ljmp $BOOTSEG, $start2
+
+start2:
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ xorw %sp, %sp
+ sti
+ cld
+
+ movw $bugger_off_msg, %si
+
+msg_loop:
+ lodsb
+ andb %al, %al
+ jz bs_die
+ movb $0xe, %ah
+ movw $7, %bx
+ int $0x10
+ jmp msg_loop
+
+bs_die:
+ # Allow the user to press a key, then reboot
+ xorw %ax, %ax
+ int $0x16
+ int $0x19
+
+ # int 0x19 should never return. In case it does anyway,
+ # invoke the BIOS reset code...
+ ljmp $0xf000,$0xfff0
+
+#ifdef CONFIG_EFI_STUB
+ .org 0x3c
+ #
+ # Offset to the PE header.
+ #
+ .long pe_header
+#endif /* CONFIG_EFI_STUB */
+
+ .section ".bsdata", "a"
+bugger_off_msg:
+ .ascii "Use a boot loader.\r\n"
+ .ascii "\n"
+ .ascii "Remove disk and press any key to reboot...\r\n"
+ .byte 0
+
+#ifdef CONFIG_EFI_STUB
+pe_header:
+ .ascii "PE"
+ .word 0
+
+coff_header:
+#ifdef CONFIG_X86_32
+ .word 0x14c # i386
+#else
+ .word 0x8664 # x86-64
+#endif
+ .word 4 # nr_sections
+ .long 0 # TimeDateStamp
+ .long 0 # PointerToSymbolTable
+ .long 1 # NumberOfSymbols
+ .word section_table - optional_header # SizeOfOptionalHeader
+#ifdef CONFIG_X86_32
+ .word 0x306 # Characteristics.
+ # IMAGE_FILE_32BIT_MACHINE |
+ # IMAGE_FILE_DEBUG_STRIPPED |
+ # IMAGE_FILE_EXECUTABLE_IMAGE |
+ # IMAGE_FILE_LINE_NUMS_STRIPPED
+#else
+ .word 0x206 # Characteristics
+ # IMAGE_FILE_DEBUG_STRIPPED |
+ # IMAGE_FILE_EXECUTABLE_IMAGE |
+ # IMAGE_FILE_LINE_NUMS_STRIPPED
+#endif
+
+optional_header:
+#ifdef CONFIG_X86_32
+ .word 0x10b # PE32 format
+#else
+ .word 0x20b # PE32+ format
+#endif
+ .byte 0x02 # MajorLinkerVersion
+ .byte 0x14 # MinorLinkerVersion
+
+ # Filled in by build.c
+ .long 0 # SizeOfCode
+
+ .long 0 # SizeOfInitializedData
+ .long 0 # SizeOfUninitializedData
+
+ # Filled in by build.c
+ .long 0x0000 # AddressOfEntryPoint
+
+ .long 0x0200 # BaseOfCode
+#ifdef CONFIG_X86_32
+ .long 0 # data
+#endif
+
+extra_header_fields:
+#ifdef CONFIG_X86_32
+ .long 0 # ImageBase
+#else
+ .quad 0 # ImageBase
+#endif
+ .long 0x20 # SectionAlignment
+ .long 0x20 # FileAlignment
+ .word 0 # MajorOperatingSystemVersion
+ .word 0 # MinorOperatingSystemVersion
+ .word 0 # MajorImageVersion
+ .word 0 # MinorImageVersion
+ .word 0 # MajorSubsystemVersion
+ .word 0 # MinorSubsystemVersion
+ .long 0 # Win32VersionValue
+
+ #
+ # The size of the bzImage is written in tools/build.c
+ #
+ .long 0 # SizeOfImage
+
+ .long 0x200 # SizeOfHeaders
+ .long 0 # CheckSum
+ .word 0xa # Subsystem (EFI application)
+ .word 0 # DllCharacteristics
+#ifdef CONFIG_X86_32
+ .long 0 # SizeOfStackReserve
+ .long 0 # SizeOfStackCommit
+ .long 0 # SizeOfHeapReserve
+ .long 0 # SizeOfHeapCommit
+#else
+ .quad 0 # SizeOfStackReserve
+ .quad 0 # SizeOfStackCommit
+ .quad 0 # SizeOfHeapReserve
+ .quad 0 # SizeOfHeapCommit
+#endif
+ .long 0 # LoaderFlags
+ .long 0x6 # NumberOfRvaAndSizes
+
+ .quad 0 # ExportTable
+ .quad 0 # ImportTable
+ .quad 0 # ResourceTable
+ .quad 0 # ExceptionTable
+ .quad 0 # CertificationTable
+ .quad 0 # BaseRelocationTable
+
+ # Section table
+section_table:
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".setup"
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x60500020 # Characteristics (section flags)
+
+ #
+ # The EFI application loader requires a relocation section
+ # because EFI applications must be relocatable. The .reloc
+ # offset & size fields are filled in by build.c.
+ #
+ .ascii ".reloc"
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0
+ .long 0 # SizeOfRawData
+ .long 0 # PointerToRawData
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x42100040 # Characteristics (section flags)
+
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".text"
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x60500020 # Characteristics (section flags)
+
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".bss"
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0xc8000080 # Characteristics (section flags)
+
+#endif /* CONFIG_EFI_STUB */
+
+ # Kernel attributes; used by setup. This is part 1 of the
+ # header, from the old boot sector.
+
+ .section ".header", "a"
+ .globl sentinel
+sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
+
+ .globl hdr
+hdr:
+setup_sects: .byte 0 /* Filled in by build.c */
+root_flags: .word ROOT_RDONLY
+syssize: .long 0 /* Filled in by build.c */
+ram_size: .word 0 /* Obsolete */
+vid_mode: .word SVGA_MODE
+root_dev: .word 0 /* Filled in by build.c */
+boot_flag: .word 0xAA55
+
+ # offset 512, entry point
+
+ .globl _start
+_start:
+ # Explicitly enter this as bytes, or the assembler
+ # tries to generate a 3-byte jump here, which causes
+ # everything else to push off to the wrong offset.
+ .byte 0xeb # short (2-byte) jump
+ .byte start_of_setup-1f
+1:
+
+ # Part 2 of the header, from the old setup.S
+
+ .ascii "HdrS" # header signature
+ .word 0x020d # header version number (>= 0x0105)
+ # or else old loadlin-1.5 will fail)
+ .globl realmode_swtch
+realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
+start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
+ # in case something decided to "use" it
+ .word kernel_version-512 # pointing to kernel version string
+ # above section of header is compatible
+ # with loadlin-1.5 (header v1.5). Don't
+ # change it.
+
+type_of_loader: .byte 0 # 0 means ancient bootloader, newer
+ # bootloaders know to change this.
+ # See Documentation/x86/boot.txt for
+ # assigned ids
+
+# flags, unused bits must be zero (RFU) bit within loadflags
+loadflags:
+ .byte LOADED_HIGH # The kernel is to be loaded high
+
+setup_move_size: .word 0x8000 # size to move, when setup is not
+ # loaded at 0x90000. We will move setup
+ # to 0x90000 then just before jumping
+ # into the kernel. However, only the
+ # loader knows how much data behind
+ # us also needs to be loaded.
+
+code32_start: # here loaders can put a different
+ # start address for 32-bit code.
+ .long 0x100000 # 0x100000 = default for big kernel
+
+ramdisk_image: .long 0 # address of loaded ramdisk image
+ # Here the loader puts the 32-bit
+ # address where it loaded the image.
+ # This only will be read by the kernel.
+
+ramdisk_size: .long 0 # its size in bytes
+
+bootsect_kludge:
+ .long 0 # obsolete
+
+heap_end_ptr: .word _end+STACK_SIZE-512
+ # (Header version 0x0201 or later)
+ # space from here (exclusive) down to
+ # end of setup code can be used by setup
+ # for local heap purposes.
+
+ext_loader_ver:
+ .byte 0 # Extended boot loader version
+ext_loader_type:
+ .byte 0 # Extended boot loader type
+
+cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
+ # If nonzero, a 32-bit pointer
+ # to the kernel command line.
+ # The command line should be
+ # located between the start of
+ # setup and the end of low
+ # memory (0xa0000), or it may
+ # get overwritten before it
+ # gets read. If this field is
+ # used, there is no longer
+ # anything magical about the
+ # 0x90000 segment; the setup
+ # can be located anywhere in
+ # low memory 0x10000 or higher.
+
+initrd_addr_max: .long 0x7fffffff
+ # (Header version 0x0203 or later)
+ # The highest safe address for
+ # the contents of an initrd
+ # The current kernel allows up to 4 GB,
+ # but leave it at 2 GB to avoid
+ # possible bootloader bugs.
+
+kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
+ #required for protected mode
+ #kernel
+#ifdef CONFIG_RELOCATABLE
+relocatable_kernel: .byte 1
+#else
+relocatable_kernel: .byte 0
+#endif
+min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
+
+xloadflags:
+#ifdef CONFIG_X86_64
+# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
+#else
+# define XLF0 0
+#endif
+
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ /* kernel/boot_param/ramdisk could be loaded above 4g */
+# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
+#else
+# define XLF1 0
+#endif
+
+#ifdef CONFIG_EFI_STUB
+# ifdef CONFIG_EFI_MIXED
+# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
+# else
+# ifdef CONFIG_X86_64
+# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+# else
+# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+# endif
+# endif
+#else
+# define XLF23 0
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE)
+# define XLF4 XLF_EFI_KEXEC
+#else
+# define XLF4 0
+#endif
+
+ .word XLF0 | XLF1 | XLF23 | XLF4
+
+cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
+ #added with boot protocol
+ #version 2.06
+
+hardware_subarch: .long 0 # subarchitecture, added with 2.07
+ # default to 0 for normal x86 PC
+
+hardware_subarch_data: .quad 0
+
+payload_offset: .long ZO_input_data
+payload_length: .long ZO_z_input_len
+
+setup_data: .quad 0 # 64-bit physical pointer to
+ # single linked list of
+ # struct setup_data
+
+pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
+
+#
+# Getting to provably safe in-place decompression is hard. Worst case
+# behaviours need to be analyzed. Here let's take the decompression of
+# a gzip-compressed kernel as example, to illustrate it:
+#
+# The file layout of gzip compressed kernel is:
+#
+# magic[2]
+# method[1]
+# flags[1]
+# timestamp[4]
+# extraflags[1]
+# os[1]
+# compressed data blocks[N]
+# crc[4] orig_len[4]
+#
+# ... resulting in +18 bytes overhead of uncompressed data.
+#
+# (For more information, please refer to RFC 1951 and RFC 1952.)
+#
+# Files divided into blocks
+# 1 bit (last block flag)
+# 2 bits (block type)
+#
+# 1 block occurs every 32K -1 bytes or when there 50% compression
+# has been achieved. The smallest block type encoding is always used.
+#
+# stored:
+# 32 bits length in bytes.
+#
+# fixed:
+# magic fixed tree.
+# symbols.
+#
+# dynamic:
+# dynamic tree encoding.
+# symbols.
+#
+#
+# The buffer for decompression in place is the length of the uncompressed
+# data, plus a small amount extra to keep the algorithm safe. The
+# compressed data is placed at the end of the buffer. The output pointer
+# is placed at the start of the buffer and the input pointer is placed
+# where the compressed data starts. Problems will occur when the output
+# pointer overruns the input pointer.
+#
+# The output pointer can only overrun the input pointer if the input
+# pointer is moving faster than the output pointer. A condition only
+# triggered by data whose compressed form is larger than the uncompressed
+# form.
+#
+# The worst case at the block level is a growth of the compressed data
+# of 5 bytes per 32767 bytes.
+#
+# The worst case internal to a compressed block is very hard to figure.
+# The worst case can at least be bounded by having one bit that represents
+# 32764 bytes and then all of the rest of the bytes representing the very
+# very last byte.
+#
+# All of which is enough to compute an amount of extra data that is required
+# to be safe. To avoid problems at the block level allocating 5 extra bytes
+# per 32767 bytes of data is sufficient. To avoid problems internal to a
+# block adding an extra 32767 bytes (the worst case uncompressed block size)
+# is sufficient, to ensure that in the worst case the decompressed data for
+# block will stop the byte before the compressed data for a block begins.
+# To avoid problems with the compressed data's meta information an extra 18
+# bytes are needed. Leading to the formula:
+#
+# extra_bytes = (uncompressed_size >> 12) + 32768 + 18
+#
+# Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+# Adding 32768 instead of 32767 just makes for round numbers.
+#
+# Above analysis is for decompressing gzip compressed kernel only. Up to
+# now 6 different decompressor are supported all together. And among them
+# xz stores data in chunks and has maximum chunk of 64K. Hence safety
+# margin should be updated to cover all decompressors so that we don't
+# need to deal with each of them separately. Please check
+# the description in lib/decompressor_xxx.c for specific information.
+#
+# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
+#
+# LZ4 is even worse: data that cannot be further compressed grows by 0.4%,
+# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as
+# the size-dependent part now grows so fast.
+#
+# extra_bytes = (uncompressed_size >> 8) + 65536
+
+#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 65536)
+#if ZO_z_output_len > ZO_z_input_len
+# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \
+ ZO_z_input_len)
+#else
+# define ZO_z_extract_offset ZO_z_extra_bytes
+#endif
+
+/*
+ * The extract_offset has to be bigger than ZO head section. Otherwise when
+ * the head code is running to move ZO to the end of the buffer, it will
+ * overwrite the head code itself.
+ */
+#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset
+# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095)
+#else
+# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095)
+#endif
+
+#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset)
+
+#define VO_INIT_SIZE (VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+# define INIT_SIZE ZO_INIT_SIZE
+#else
+# define INIT_SIZE VO_INIT_SIZE
+#endif
+
+init_size: .long INIT_SIZE # kernel initialization size
+handover_offset: .long 0 # Filled in by build.c
+
+# End of setup header #####################################################
+
+ .section ".entrytext", "ax"
+start_of_setup:
+# Force %es = %ds
+ movw %ds, %ax
+ movw %ax, %es
+ cld
+
+# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
+# which happened to work by accident for the old code. Recalculate the stack
+# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
+# stack behind its own code, so we can't blindly put it directly past the heap.
+
+ movw %ss, %dx
+ cmpw %ax, %dx # %ds == %ss?
+ movw %sp, %dx
+ je 2f # -> assume %sp is reasonably set
+
+ # Invalid %ss, make up a new stack
+ movw $_end, %dx
+ testb $CAN_USE_HEAP, loadflags
+ jz 1f
+ movw heap_end_ptr, %dx
+1: addw $STACK_SIZE, %dx
+ jnc 2f
+ xorw %dx, %dx # Prevent wraparound
+
+2: # Now %dx should point to the end of our stack space
+ andw $~3, %dx # dword align (might as well...)
+ jnz 3f
+ movw $0xfffc, %dx # Make sure we're not zero
+3: movw %ax, %ss
+ movzwl %dx, %esp # Clear upper half of %esp
+ sti # Now we should have a working stack
+
+# We will have entered with %cs = %ds+0x20, normalize %cs so
+# it is on par with the other segments.
+ pushw %ds
+ pushw $6f
+ lretw
+6:
+
+# Check signature at end of setup
+ cmpl $0x5a5aaa55, setup_sig
+ jne setup_bad
+
+# Zero the bss
+ movw $__bss_start, %di
+ movw $_end+3, %cx
+ xorl %eax, %eax
+ subw %di, %cx
+ shrw $2, %cx
+ rep; stosl
+
+# Jump to C code (should not return)
+ calll main
+
+# Setup corrupt somehow...
+setup_bad:
+ movl $setup_corrupt, %eax
+ calll puts
+ # Fall through...
+
+ .globl die
+ .type die, @function
+die:
+ hlt
+ jmp die
+
+ .size die, .-die
+
+ .section ".initdata", "a"
+setup_corrupt:
+ .byte 7
+ .string "No setup signature found...\n"
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
new file mode 100644
index 000000000..d13ec1c38
--- /dev/null
+++ b/arch/x86/boot/install.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+ mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+ mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+if [ -x /sbin/lilo ]; then
+ /sbin/lilo
+elif [ -x /etc/lilo/install ]; then
+ /etc/lilo/install
+else
+ sync
+ echo "Cannot find LILO."
+fi
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
new file mode 100644
index 000000000..9bcea386d
--- /dev/null
+++ b/arch/x86/boot/main.c
@@ -0,0 +1,182 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Main module for the real-mode kernel code
+ */
+
+#include "boot.h"
+#include "string.h"
+
+struct boot_params boot_params __attribute__((aligned(16)));
+
+char *HEAP = _end;
+char *heap_end = _end; /* Default end of heap = no heap */
+
+/*
+ * Copy the header into the boot parameter block. Since this
+ * screws up the old-style command line protocol, adjust by
+ * filling in the new-style command line pointer instead.
+ */
+
+static void copy_boot_params(void)
+{
+ struct old_cmdline {
+ u16 cl_magic;
+ u16 cl_offset;
+ };
+ const struct old_cmdline * const oldcmd =
+ (const struct old_cmdline *)OLD_CL_ADDRESS;
+
+ BUILD_BUG_ON(sizeof boot_params != 4096);
+ memcpy(&boot_params.hdr, &hdr, sizeof hdr);
+
+ if (!boot_params.hdr.cmd_line_ptr &&
+ oldcmd->cl_magic == OLD_CL_MAGIC) {
+ /* Old-style command line protocol. */
+ u16 cmdline_seg;
+
+ /* Figure out if the command line falls in the region
+ of memory that an old kernel would have copied up
+ to 0x90000... */
+ if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
+ cmdline_seg = ds();
+ else
+ cmdline_seg = 0x9000;
+
+ boot_params.hdr.cmd_line_ptr =
+ (cmdline_seg << 4) + oldcmd->cl_offset;
+ }
+}
+
+/*
+ * Query the keyboard lock status as given by the BIOS, and
+ * set the keyboard repeat rate to maximum. Unclear why the latter
+ * is done here; this might be possible to kill off as stale code.
+ */
+static void keyboard_init(void)
+{
+ struct biosregs ireg, oreg;
+ initregs(&ireg);
+
+ ireg.ah = 0x02; /* Get keyboard status */
+ intcall(0x16, &ireg, &oreg);
+ boot_params.kbd_status = oreg.al;
+
+ ireg.ax = 0x0305; /* Set keyboard repeat rate */
+ intcall(0x16, &ireg, NULL);
+}
+
+/*
+ * Get Intel SpeedStep (IST) information.
+ */
+static void query_ist(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* Some older BIOSes apparently crash on this call, so filter
+ it from machines too old to have SpeedStep at all. */
+ if (cpu.level < 6)
+ return;
+
+ initregs(&ireg);
+ ireg.ax = 0xe980; /* IST Support */
+ ireg.edx = 0x47534943; /* Request value */
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.ist_info.signature = oreg.eax;
+ boot_params.ist_info.command = oreg.ebx;
+ boot_params.ist_info.event = oreg.ecx;
+ boot_params.ist_info.perf_level = oreg.edx;
+}
+
+/*
+ * Tell the BIOS what CPU mode we intend to run in.
+ */
+static void set_bios_mode(void)
+{
+#ifdef CONFIG_X86_64
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.ax = 0xec00;
+ ireg.bx = 2;
+ intcall(0x15, &ireg, NULL);
+#endif
+}
+
+static void init_heap(void)
+{
+ char *stack_end;
+
+ if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
+ asm("leal %P1(%%esp),%0"
+ : "=r" (stack_end) : "i" (-STACK_SIZE));
+
+ heap_end = (char *)
+ ((size_t)boot_params.hdr.heap_end_ptr + 0x200);
+ if (heap_end > stack_end)
+ heap_end = stack_end;
+ } else {
+ /* Boot protocol 2.00 only, no heap available */
+ puts("WARNING: Ancient bootloader, some functionality "
+ "may be limited!\n");
+ }
+}
+
+void main(void)
+{
+ /* First, copy the boot header into the "zeropage" */
+ copy_boot_params();
+
+ /* Initialize the early-boot console */
+ console_init();
+ if (cmdline_find_option_bool("debug"))
+ puts("early console in setup code\n");
+
+ /* End of heap check */
+ init_heap();
+
+ /* Make sure we have all the proper CPU support */
+ if (validate_cpu()) {
+ puts("Unable to boot - please use a kernel appropriate "
+ "for your CPU.\n");
+ die();
+ }
+
+ /* Tell the BIOS what CPU mode we intend to run in. */
+ set_bios_mode();
+
+ /* Detect memory layout */
+ detect_memory();
+
+ /* Set keyboard repeat rate (why?) and query the lock flags */
+ keyboard_init();
+
+ /* Query Intel SpeedStep (IST) information */
+ query_ist();
+
+ /* Query APM information */
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+ query_apm_bios();
+#endif
+
+ /* Query EDD information */
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+ query_edd();
+#endif
+
+ /* Set the video mode */
+ set_video();
+
+ /* Do the last things and invoke protected mode */
+ go_to_protected_mode();
+}
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
new file mode 100644
index 000000000..d9c28c87e
--- /dev/null
+++ b/arch/x86/boot/memory.c
@@ -0,0 +1,136 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Memory detection code
+ */
+
+#include "boot.h"
+
+#define SMAP 0x534d4150 /* ASCII "SMAP" */
+
+static int detect_memory_e820(void)
+{
+ int count = 0;
+ struct biosregs ireg, oreg;
+ struct boot_e820_entry *desc = boot_params.e820_table;
+ static struct boot_e820_entry buf; /* static so it is zeroed */
+
+ initregs(&ireg);
+ ireg.ax = 0xe820;
+ ireg.cx = sizeof buf;
+ ireg.edx = SMAP;
+ ireg.di = (size_t)&buf;
+
+ /*
+ * Note: at least one BIOS is known which assumes that the
+ * buffer pointed to by one e820 call is the same one as
+ * the previous call, and only changes modified fields. Therefore,
+ * we use a temporary buffer and copy the results entry by entry.
+ *
+ * This routine deliberately does not try to account for
+ * ACPI 3+ extended attributes. This is because there are
+ * BIOSes in the field which report zero for the valid bit for
+ * all ranges, and we don't currently make any use of the
+ * other attribute bits. Revisit this if we see the extended
+ * attribute bits deployed in a meaningful way in the future.
+ */
+
+ do {
+ intcall(0x15, &ireg, &oreg);
+ ireg.ebx = oreg.ebx; /* for next iteration... */
+
+ /* BIOSes which terminate the chain with CF = 1 as opposed
+ to %ebx = 0 don't always report the SMAP signature on
+ the final, failing, probe. */
+ if (oreg.eflags & X86_EFLAGS_CF)
+ break;
+
+ /* Some BIOSes stop returning SMAP in the middle of
+ the search loop. We don't know exactly how the BIOS
+ screwed up the map at that point, we might have a
+ partial map, the full map, or complete garbage, so
+ just return failure. */
+ if (oreg.eax != SMAP) {
+ count = 0;
+ break;
+ }
+
+ *desc++ = buf;
+ count++;
+ } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_table));
+
+ return boot_params.e820_entries = count;
+}
+
+static int detect_memory_e801(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0xe801;
+ intcall(0x15, &ireg, &oreg);
+
+ if (oreg.eflags & X86_EFLAGS_CF)
+ return -1;
+
+ /* Do we really need to do this? */
+ if (oreg.cx || oreg.dx) {
+ oreg.ax = oreg.cx;
+ oreg.bx = oreg.dx;
+ }
+
+ if (oreg.ax > 15*1024) {
+ return -1; /* Bogus! */
+ } else if (oreg.ax == 15*1024) {
+ boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
+ } else {
+ /*
+ * This ignores memory above 16MB if we have a memory
+ * hole there. If someone actually finds a machine
+ * with a memory hole at 16MB and no support for
+ * 0E820h they should probably generate a fake e820
+ * map.
+ */
+ boot_params.alt_mem_k = oreg.ax;
+ }
+
+ return 0;
+}
+
+static int detect_memory_88(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x88;
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.screen_info.ext_mem_k = oreg.ax;
+
+ return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
+}
+
+int detect_memory(void)
+{
+ int err = -1;
+
+ if (detect_memory_e820() > 0)
+ err = 0;
+
+ if (!detect_memory_e801())
+ err = 0;
+
+ if (!detect_memory_88())
+ err = 0;
+
+ return err;
+}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 000000000..f72498dc9
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,52 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../include/asm/required-features.h"
+#include "../include/asm/disabled-features.h"
+#include "../include/asm/cpufeatures.h"
+#include "../kernel/cpu/capflags.c"
+
+int main(void)
+{
+ int i, j;
+ const char *str;
+
+ printf("static const char x86_cap_strs[] =\n");
+
+ for (i = 0; i < NCAPINTS; i++) {
+ for (j = 0; j < 32; j++) {
+ str = x86_cap_flags[i*32+j];
+
+ if (i == NCAPINTS-1 && j == 31) {
+ /* The last entry must be unconditional; this
+ also consumes the compiler-added null
+ character */
+ if (!str)
+ str = "";
+ printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
+ i, j, str);
+ } else if (str) {
+ printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+ "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
+ "#endif\n",
+ i, j, i, j, str);
+ }
+ }
+ }
+ printf("\t;\n");
+ return 0;
+}
diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in
new file mode 100644
index 000000000..efd6d2490
--- /dev/null
+++ b/arch/x86/boot/mtools.conf.in
@@ -0,0 +1,17 @@
+#
+# mtools configuration file for "make (b)zdisk"
+#
+
+# Actual floppy drive
+drive a:
+ file="/dev/fd0"
+
+# 1.44 MB floppy disk image
+drive v:
+ file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
+
+# 2.88 MB floppy disk image (mostly for virtual uses)
+drive w:
+ file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
+
+
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
new file mode 100644
index 000000000..8062f8915
--- /dev/null
+++ b/arch/x86/boot/pm.c
@@ -0,0 +1,126 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Prepare the machine for transition to protected mode.
+ */
+
+#include "boot.h"
+#include <asm/segment.h>
+
+/*
+ * Invoke the realmode switch hook if present; otherwise
+ * disable all interrupts.
+ */
+static void realmode_switch_hook(void)
+{
+ if (boot_params.hdr.realmode_swtch) {
+ asm volatile("lcallw *%0"
+ : : "m" (boot_params.hdr.realmode_swtch)
+ : "eax", "ebx", "ecx", "edx");
+ } else {
+ asm volatile("cli");
+ outb(0x80, 0x70); /* Disable NMI */
+ io_delay();
+ }
+}
+
+/*
+ * Disable all interrupts at the legacy PIC.
+ */
+static void mask_all_interrupts(void)
+{
+ outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
+ io_delay();
+ outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
+ io_delay();
+}
+
+/*
+ * Reset IGNNE# if asserted in the FPU.
+ */
+static void reset_coprocessor(void)
+{
+ outb(0, 0xf0);
+ io_delay();
+ outb(0, 0xf1);
+ io_delay();
+}
+
+/*
+ * Set up the GDT
+ */
+
+struct gdt_ptr {
+ u16 len;
+ u32 ptr;
+} __attribute__((packed));
+
+static void setup_gdt(void)
+{
+ /* There are machines which are known to not boot with the GDT
+ being 8-byte unaligned. Intel recommends 16 byte alignment. */
+ static const u64 boot_gdt[] __attribute__((aligned(16))) = {
+ /* CS: code, read/execute, 4 GB, base 0 */
+ [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
+ /* DS: data, read/write, 4 GB, base 0 */
+ [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+ /* TSS: 32-bit tss, 104 bytes, base 4096 */
+ /* We only have a TSS here to keep Intel VT happy;
+ we don't actually use it for anything. */
+ [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
+ };
+ /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
+ of the gdt_ptr contents. Thus, make it static so it will
+ stay in memory, at least long enough that we switch to the
+ proper kernel GDT. */
+ static struct gdt_ptr gdt;
+
+ gdt.len = sizeof(boot_gdt)-1;
+ gdt.ptr = (u32)&boot_gdt + (ds() << 4);
+
+ asm volatile("lgdtl %0" : : "m" (gdt));
+}
+
+/*
+ * Set up the IDT
+ */
+static void setup_idt(void)
+{
+ static const struct gdt_ptr null_idt = {0, 0};
+ asm volatile("lidtl %0" : : "m" (null_idt));
+}
+
+/*
+ * Actual invocation sequence
+ */
+void go_to_protected_mode(void)
+{
+ /* Hook before leaving real mode, also disables interrupts */
+ realmode_switch_hook();
+
+ /* Enable the A20 gate */
+ if (enable_a20()) {
+ puts("A20 gate not responding, unable to boot...\n");
+ die();
+ }
+
+ /* Reset coprocessor (IGNNE#) */
+ reset_coprocessor();
+
+ /* Mask all interrupts in the PIC */
+ mask_all_interrupts();
+
+ /* Actual transition to protected mode... */
+ setup_idt();
+ setup_gdt();
+ protected_mode_jump(boot_params.hdr.code32_start,
+ (u32)&boot_params + (ds() << 4));
+}
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
new file mode 100644
index 000000000..3e0edc6d2
--- /dev/null
+++ b/arch/x86/boot/pmjump.S
@@ -0,0 +1,77 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * The actual transition into protected mode
+ */
+
+#include <asm/boot.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+#include <linux/linkage.h>
+
+ .text
+ .code16
+
+/*
+ * void protected_mode_jump(u32 entrypoint, u32 bootparams);
+ */
+GLOBAL(protected_mode_jump)
+ movl %edx, %esi # Pointer to boot_params table
+
+ xorl %ebx, %ebx
+ movw %cs, %bx
+ shll $4, %ebx
+ addl %ebx, 2f
+ jmp 1f # Short jump to serialize on 386/486
+1:
+
+ movw $__BOOT_DS, %cx
+ movw $__BOOT_TSS, %di
+
+ movl %cr0, %edx
+ orb $X86_CR0_PE, %dl # Protected mode
+ movl %edx, %cr0
+
+ # Transition to 32-bit mode
+ .byte 0x66, 0xea # ljmpl opcode
+2: .long in_pm32 # offset
+ .word __BOOT_CS # segment
+ENDPROC(protected_mode_jump)
+
+ .code32
+ .section ".text32","ax"
+GLOBAL(in_pm32)
+ # Set up data segments for flat 32-bit mode
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %ecx, %fs
+ movl %ecx, %gs
+ movl %ecx, %ss
+ # The 32-bit code sets up its own stack, but this way we do have
+ # a valid stack if some debugging hack wants to use it.
+ addl %ebx, %esp
+
+ # Set up TR to make Intel VT happy
+ ltr %di
+
+ # Clear registers to allow for future extensions to the
+ # 32-bit boot protocol
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %ebx, %ebx
+ xorl %ebp, %ebp
+ xorl %edi, %edi
+
+ # Set up LDTR to make Intel VT happy
+ lldt %cx
+
+ jmpl *%eax # Jump to the 32-bit entrypoint
+ENDPROC(in_pm32)
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
new file mode 100644
index 000000000..565083c16
--- /dev/null
+++ b/arch/x86/boot/printf.c
@@ -0,0 +1,309 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Oh, it's a waste of space, but oh-so-yummy for debugging. This
+ * version of printf() does not include 64-bit support. "Live with
+ * it."
+ *
+ */
+
+#include "boot.h"
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) ({ \
+int __res; \
+__res = ((unsigned long) n) % (unsigned) base; \
+n = ((unsigned long) n) / (unsigned) base; \
+__res; })
+
+static char *number(char *str, long num, int base, int size, int precision,
+ int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ *str++ = ' ';
+ if (sign)
+ *str++ = sign;
+ if (type & SPECIAL) {
+ if (base == 8)
+ *str++ = '0';
+ else if (base == 16) {
+ *str++ = '0';
+ *str++ = ('X' | locase);
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ *str++ = c;
+ while (i < precision--)
+ *str++ = '0';
+ while (i-- > 0)
+ *str++ = tmp[i];
+ while (size-- > 0)
+ *str++ = ' ';
+ return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long num;
+ int i, base;
+ char *str;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ *str++ = *fmt;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ *str++ = ' ';
+ *str++ = (unsigned char)va_arg(args, int);
+ while (--field_width > 0)
+ *str++ = ' ';
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ *str++ = ' ';
+ for (i = 0; i < len; ++i)
+ *str++ = *s++;
+ while (len < field_width--)
+ *str++ = ' ';
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str,
+ (unsigned long)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ *str++ = '%';
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ *str++ = '%';
+ if (*fmt)
+ *str++ = *fmt;
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, unsigned long);
+ else if (qualifier == 'h') {
+ num = (unsigned short)va_arg(args, int);
+ if (flags & SIGN)
+ num = (short)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, unsigned int);
+ str = number(str, num, base, field_width, precision, flags);
+ }
+ *str = '\0';
+ return str - buf;
+}
+
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsprintf(buf, fmt, args);
+ va_end(args);
+ return i;
+}
+
+int printf(const char *fmt, ...)
+{
+ char printf_buf[1024];
+ va_list args;
+ int printed;
+
+ va_start(args, fmt);
+ printed = vsprintf(printf_buf, fmt, args);
+ va_end(args);
+
+ puts(printf_buf);
+
+ return printed;
+}
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 000000000..c0fb356a3
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,30 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+#include "string.h"
+
+void initregs(struct biosregs *reg)
+{
+ memset(reg, 0, sizeof *reg);
+ reg->eflags |= X86_EFLAGS_CF;
+ reg->ds = ds();
+ reg->es = ds();
+ reg->fs = fs();
+ reg->gs = gs();
+}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
new file mode 100644
index 000000000..96a6c7563
--- /dev/null
+++ b/arch/x86/boot/setup.ld
@@ -0,0 +1,64 @@
+/*
+ * setup.ld
+ *
+ * Linker script for the i386 setup code
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = 0;
+ .bstext : { *(.bstext) }
+ .bsdata : { *(.bsdata) }
+
+ . = 495;
+ .header : { *(.header) }
+ .entrytext : { *(.entrytext) }
+ .inittext : { *(.inittext) }
+ .initdata : { *(.initdata) }
+ __end_init = .;
+
+ .text : { *(.text) }
+ .text32 : { *(.text32) }
+
+ . = ALIGN(16);
+ .rodata : { *(.rodata*) }
+
+ .videocards : {
+ video_cards = .;
+ *(.videocards)
+ video_cards_end = .;
+ }
+
+ . = ALIGN(16);
+ .data : { *(.data*) }
+
+ .signature : {
+ setup_sig = .;
+ LONG(0x5a5aaa55)
+ }
+
+
+ . = ALIGN(16);
+ .bss :
+ {
+ __bss_start = .;
+ *(.bss)
+ __bss_end = .;
+ }
+ . = ALIGN(16);
+ _end = .;
+
+ /DISCARD/ : { *(.note*) }
+
+ /*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
+ . = ASSERT(_end <= 0x8000, "Setup too big!");
+ . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+ /* Necessary for the very-old-loader check to work... */
+ . = ASSERT(__end_init <= 5*512, "init sections too big!");
+
+}
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
new file mode 100644
index 000000000..2622c0742
--- /dev/null
+++ b/arch/x86/boot/string.c
@@ -0,0 +1,197 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very basic string functions
+ */
+
+#include <linux/types.h>
+#include <asm/asm.h>
+#include "ctype.h"
+#include "string.h"
+
+/*
+ * Undef these macros so that the functions that we provide
+ * here will have the correct names regardless of how string.h
+ * may have chosen to #define them.
+ */
+#undef memcpy
+#undef memset
+#undef memcmp
+
+int memcmp(const void *s1, const void *s2, size_t len)
+{
+ bool diff;
+ asm("repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
+/*
+ * Clang may lower `memcmp == 0` to `bcmp == 0`.
+ */
+int bcmp(const void *s1, const void *s2, size_t len)
+{
+ return memcmp(s1, s2, len);
+}
+
+int strcmp(const char *str1, const char *str2)
+{
+ const unsigned char *s1 = (const unsigned char *)str1;
+ const unsigned char *s2 = (const unsigned char *)str2;
+ int delta = 0;
+
+ while (*s1 || *s2) {
+ delta = *s1 - *s2;
+ if (delta)
+ return delta;
+ s1++;
+ s2++;
+ }
+ return 0;
+}
+
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+size_t strnlen(const char *s, size_t maxlen)
+{
+ const char *es = s;
+ while (*es && maxlen) {
+ es++;
+ maxlen--;
+ }
+
+ return (es - s);
+}
+
+unsigned int atou(const char *s)
+{
+ unsigned int i = 0;
+ while (isdigit(*s))
+ i = i * 10 + (*s++ - '0');
+ return i;
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+static unsigned int simple_guess_base(const char *cp)
+{
+ if (cp[0] == '0') {
+ if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+ return 16;
+ else
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
+{
+ unsigned long long result = 0;
+
+ if (!base)
+ base = simple_guess_base(cp);
+
+ if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+ cp += 2;
+
+ while (isxdigit(*cp)) {
+ unsigned int value;
+
+ value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+ if (value >= base)
+ break;
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+
+ return result;
+}
+
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoull(cp + 1, endp, base);
+
+ return simple_strtoull(cp, endp, base);
+}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/**
+ * strchr - Find the first occurrence of the character c in the string s.
+ * @s: the string to be searched
+ * @c: the character to search for
+ */
+char *strchr(const char *s, int c)
+{
+ while (*s != (char)c)
+ if (*s++ == '\0')
+ return NULL;
+ return (char *)s;
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
new file mode 100644
index 000000000..3d78e2707
--- /dev/null
+++ b/arch/x86/boot/string.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_STRING_H
+#define BOOT_STRING_H
+
+/* Undef any of these macros coming from string_32.h. */
+#undef memcpy
+#undef memset
+#undef memcmp
+
+void *memcpy(void *dst, const void *src, size_t len);
+void *memset(void *dst, int c, size_t len);
+int memcmp(const void *s1, const void *s2, size_t len);
+
+/*
+ * Access builtin version by default. If one needs to use optimized version,
+ * do "undef memcpy" in .c file and link against right string.c
+ */
+#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
+#define memset(d,c,l) __builtin_memset(d,c,l)
+#define memcmp __builtin_memcmp
+
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern char *strchr(const char *s, int c);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+ unsigned int base);
+
+#endif /* BOOT_STRING_H */
diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore
new file mode 100644
index 000000000..378eac25d
--- /dev/null
+++ b/arch/x86/boot/tools/.gitignore
@@ -0,0 +1 @@
+build
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
new file mode 100644
index 000000000..bf0e82400
--- /dev/null
+++ b/arch/x86/boot/tools/build.c
@@ -0,0 +1,442 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares
+ * Copyright (C) 2007 H. Peter Anvin
+ */
+
+/*
+ * This file builds a disk-image from three different files:
+ *
+ * - setup: 8086 machine code, sets up system parm
+ * - system: 80386 code for actual system
+ * - zoffset.h: header with ZO_* defines
+ *
+ * It does some checking that all files are of the correct type, and writes
+ * the result to the specified destination, removing headers and padding to
+ * the right amount. It also writes some system data to stdout.
+ */
+
+/*
+ * Changes by tytso to allow root device specification
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * Cross compiling fixes by Gertjan van Wingerde, July 1996
+ * Rewritten by Martin Mares, April 1997
+ * Substantially overhauled by H. Peter Anvin, April 2007
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <tools/le_byteshift.h>
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+
+#define DEFAULT_MAJOR_ROOT 0
+#define DEFAULT_MINOR_ROOT 0
+#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
+
+/* Minimal number of setup sectors */
+#define SETUP_SECT_MIN 5
+#define SETUP_SECT_MAX 64
+
+/* This must be large enough to hold the entire setup */
+u8 buf[SETUP_SECT_MAX*512];
+
+#define PECOFF_RELOC_RESERVE 0x20
+
+unsigned long efi32_stub_entry;
+unsigned long efi64_stub_entry;
+unsigned long efi_pe_entry;
+unsigned long startup_64;
+
+/*----------------------------------------------------------------------*/
+
+static const u32 crctab32[] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+ 0x2d02ef8d
+};
+
+static u32 partial_crc32_one(u8 c, u32 crc)
+{
+ return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
+}
+
+static u32 partial_crc32(const u8 *s, int len, u32 crc)
+{
+ while (len--)
+ crc = partial_crc32_one(*s++, crc);
+ return crc;
+}
+
+static void die(const char * str, ...)
+{
+ va_list args;
+ va_start(args, str);
+ vfprintf(stderr, str, args);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static void usage(void)
+{
+ die("Usage: build setup system zoffset.h image");
+}
+
+#ifdef CONFIG_EFI_STUB
+
+static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
+{
+ unsigned int pe_header;
+ unsigned short num_sections;
+ u8 *section;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+ num_sections = get_unaligned_le16(&buf[pe_header + 6]);
+
+#ifdef CONFIG_X86_32
+ section = &buf[pe_header + 0xa8];
+#else
+ section = &buf[pe_header + 0xb8];
+#endif
+
+ while (num_sections > 0) {
+ if (strncmp((char*)section, section_name, 8) == 0) {
+ /* section header size field */
+ put_unaligned_le32(size, section + 0x8);
+
+ /* section header vma field */
+ put_unaligned_le32(vma, section + 0xc);
+
+ /* section header 'size of initialised data' field */
+ put_unaligned_le32(datasz, section + 0x10);
+
+ /* section header 'file offset' field */
+ put_unaligned_le32(offset, section + 0x14);
+
+ break;
+ }
+ section += 0x28;
+ num_sections--;
+ }
+}
+
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+ update_pecoff_section_header_fields(section_name, offset, size, size, offset);
+}
+
+static void update_pecoff_setup_and_reloc(unsigned int size)
+{
+ u32 setup_offset = 0x200;
+ u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
+ u32 setup_size = reloc_offset - setup_offset;
+
+ update_pecoff_section_header(".setup", setup_offset, setup_size);
+ update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
+
+ /*
+ * Modify .reloc section contents with a single entry. The
+ * relocation is applied to offset 10 of the relocation section.
+ */
+ put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
+ put_unaligned_le32(10, &buf[reloc_offset + 4]);
+}
+
+static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
+{
+ unsigned int pe_header;
+ unsigned int text_sz = file_sz - text_start;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+
+ /*
+ * Size of code: Subtract the size of the first sector (512 bytes)
+ * which includes the header.
+ */
+ put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
+
+ /*
+ * Address of entry point for PE/COFF executable
+ */
+ put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
+
+ update_pecoff_section_header(".text", text_start, text_sz);
+}
+
+static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
+{
+ unsigned int pe_header;
+ unsigned int bss_sz = init_sz - file_sz;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+
+ /* Size of uninitialized data */
+ put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
+
+ /* Size of image */
+ put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
+
+ update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
+}
+
+static int reserve_pecoff_reloc_section(int c)
+{
+ /* Reserve 0x20 bytes for .reloc section */
+ memset(buf+c, 0, PECOFF_RELOC_RESERVE);
+ return PECOFF_RELOC_RESERVE;
+}
+
+static void efi_stub_defaults(void)
+{
+ /* Defaults for old kernel */
+#ifdef CONFIG_X86_32
+ efi_pe_entry = 0x10;
+#else
+ efi_pe_entry = 0x210;
+ startup_64 = 0x200;
+#endif
+}
+
+static void efi_stub_entry_update(void)
+{
+ unsigned long addr = efi32_stub_entry;
+
+#ifdef CONFIG_X86_64
+ /* Yes, this is really how we defined it :( */
+ addr = efi64_stub_entry - 0x200;
+#endif
+
+#ifdef CONFIG_EFI_MIXED
+ if (efi32_stub_entry != addr)
+ die("32-bit and 64-bit EFI entry points do not match\n");
+#endif
+ put_unaligned_le32(addr, &buf[0x264]);
+}
+
+#else
+
+static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
+static inline void update_pecoff_text(unsigned int text_start,
+ unsigned int file_sz) {}
+static inline void update_pecoff_bss(unsigned int file_sz,
+ unsigned int init_sz) {}
+static inline void efi_stub_defaults(void) {}
+static inline void efi_stub_entry_update(void) {}
+
+static inline int reserve_pecoff_reloc_section(int c)
+{
+ return 0;
+}
+#endif /* CONFIG_EFI_STUB */
+
+
+/*
+ * Parse zoffset.h and find the entry points. We could just #include zoffset.h
+ * but that would mean tools/build would have to be rebuilt every time. It's
+ * not as if parsing it is hard...
+ */
+#define PARSE_ZOFS(p, sym) do { \
+ if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
+ sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
+} while (0)
+
+static void parse_zoffset(char *fname)
+{
+ FILE *file;
+ char *p;
+ int c;
+
+ file = fopen(fname, "r");
+ if (!file)
+ die("Unable to open `%s': %m", fname);
+ c = fread(buf, 1, sizeof(buf) - 1, file);
+ if (ferror(file))
+ die("read-error on `zoffset.h'");
+ fclose(file);
+ buf[c] = 0;
+
+ p = (char *)buf;
+
+ while (p && *p) {
+ PARSE_ZOFS(p, efi32_stub_entry);
+ PARSE_ZOFS(p, efi64_stub_entry);
+ PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, startup_64);
+
+ p = strchr(p, '\n');
+ while (p && (*p == '\r' || *p == '\n'))
+ p++;
+ }
+}
+
+int main(int argc, char ** argv)
+{
+ unsigned int i, sz, setup_sectors, init_sz;
+ int c;
+ u32 sys_size;
+ struct stat sb;
+ FILE *file, *dest;
+ int fd;
+ void *kernel;
+ u32 crc = 0xffffffffUL;
+
+ efi_stub_defaults();
+
+ if (argc != 5)
+ usage();
+ parse_zoffset(argv[3]);
+
+ dest = fopen(argv[4], "w");
+ if (!dest)
+ die("Unable to write `%s': %m", argv[4]);
+
+ /* Copy the setup code */
+ file = fopen(argv[1], "r");
+ if (!file)
+ die("Unable to open `%s': %m", argv[1]);
+ c = fread(buf, 1, sizeof(buf), file);
+ if (ferror(file))
+ die("read-error on `setup'");
+ if (c < 1024)
+ die("The setup must be at least 1024 bytes");
+ if (get_unaligned_le16(&buf[510]) != 0xAA55)
+ die("Boot block hasn't got boot flag (0xAA55)");
+ fclose(file);
+
+ c += reserve_pecoff_reloc_section(c);
+
+ /* Pad unused space with zeros */
+ setup_sectors = (c + 511) / 512;
+ if (setup_sectors < SETUP_SECT_MIN)
+ setup_sectors = SETUP_SECT_MIN;
+ i = setup_sectors*512;
+ memset(buf+c, 0, i-c);
+
+ update_pecoff_setup_and_reloc(i);
+
+ /* Set the default root device */
+ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
+
+ printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
+
+ /* Open and stat the kernel file */
+ fd = open(argv[2], O_RDONLY);
+ if (fd < 0)
+ die("Unable to open `%s': %m", argv[2]);
+ if (fstat(fd, &sb))
+ die("Unable to stat `%s': %m", argv[2]);
+ sz = sb.st_size;
+ printf("System is %d kB\n", (sz+1023)/1024);
+ kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+ if (kernel == MAP_FAILED)
+ die("Unable to mmap '%s': %m", argv[2]);
+ /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
+ sys_size = (sz + 15 + 4) / 16;
+#ifdef CONFIG_EFI_STUB
+ /*
+ * COFF requires minimum 32-byte alignment of sections, and
+ * adding a signature is problematic without that alignment.
+ */
+ sys_size = (sys_size + 1) & ~1;
+#endif
+
+ /* Patch the setup code with the appropriate size parameters */
+ buf[0x1f1] = setup_sectors-1;
+ put_unaligned_le32(sys_size, &buf[0x1f4]);
+
+ update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
+ init_sz = get_unaligned_le32(&buf[0x260]);
+ update_pecoff_bss(i + (sys_size * 16), init_sz);
+
+ efi_stub_entry_update();
+
+ crc = partial_crc32(buf, i, crc);
+ if (fwrite(buf, 1, i, dest) != i)
+ die("Writing setup failed");
+
+ /* Copy the kernel code */
+ crc = partial_crc32(kernel, sz, crc);
+ if (fwrite(kernel, 1, sz, dest) != sz)
+ die("Writing kernel failed");
+
+ /* Add padding leaving 4 bytes for the checksum */
+ while (sz++ < (sys_size*16) - 4) {
+ crc = partial_crc32_one('\0', crc);
+ if (fwrite("\0", 1, 1, dest) != 1)
+ die("Writing padding failed");
+ }
+
+ /* Write the CRC */
+ printf("CRC %x\n", crc);
+ put_unaligned_le32(crc, buf);
+ if (fwrite(buf, 1, 4, dest) != 4)
+ die("Writing CRC failed");
+
+ /* Catch any delayed write failures */
+ if (fclose(dest))
+ die("Writing image failed");
+
+ close(fd);
+
+ /* Everything is OK */
+ return 0;
+}
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
new file mode 100644
index 000000000..def2451f4
--- /dev/null
+++ b/arch/x86/boot/tty.c
@@ -0,0 +1,139 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Very simple screen and serial I/O
+ */
+
+#include "boot.h"
+
+int early_serial_base;
+
+#define XMTRDY 0x20
+
+#define TXR 0 /* Transmit register (WRITE) */
+#define LSR 5 /* Line Status */
+
+/*
+ * These functions are in .inittext so they can be used to signal
+ * error during initialization.
+ */
+
+static void __attribute__((section(".inittext"))) serial_putchar(int ch)
+{
+ unsigned timeout = 0xffff;
+
+ while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+ cpu_relax();
+
+ outb(ch, early_serial_base + TXR);
+}
+
+static void __attribute__((section(".inittext"))) bios_putchar(int ch)
+{
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.bx = 0x0007;
+ ireg.cx = 0x0001;
+ ireg.ah = 0x0e;
+ ireg.al = ch;
+ intcall(0x10, &ireg, NULL);
+}
+
+void __attribute__((section(".inittext"))) putchar(int ch)
+{
+ if (ch == '\n')
+ putchar('\r'); /* \n -> \r\n */
+
+ bios_putchar(ch);
+
+ if (early_serial_base != 0)
+ serial_putchar(ch);
+}
+
+void __attribute__((section(".inittext"))) puts(const char *str)
+{
+ while (*str)
+ putchar(*str++);
+}
+
+/*
+ * Read the CMOS clock through the BIOS, and return the
+ * seconds in BCD.
+ */
+
+static u8 gettime(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x02;
+ intcall(0x1a, &ireg, &oreg);
+
+ return oreg.dh;
+}
+
+/*
+ * Read from the keyboard
+ */
+int getchar(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ /* ireg.ah = 0x00; */
+ intcall(0x16, &ireg, &oreg);
+
+ return oreg.al;
+}
+
+static int kbd_pending(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x01;
+ intcall(0x16, &ireg, &oreg);
+
+ return !(oreg.eflags & X86_EFLAGS_ZF);
+}
+
+void kbd_flush(void)
+{
+ for (;;) {
+ if (!kbd_pending())
+ break;
+ getchar();
+ }
+}
+
+int getchar_timeout(void)
+{
+ int cnt = 30;
+ int t0, t1;
+
+ t0 = gettime();
+
+ while (cnt) {
+ if (kbd_pending())
+ return getchar();
+
+ t1 = gettime();
+ if (t0 != t1) {
+ cnt--;
+ t0 = t1;
+ }
+ }
+
+ return 0; /* Timeout! */
+}
+
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
new file mode 100644
index 000000000..2b15aa488
--- /dev/null
+++ b/arch/x86/boot/version.c
@@ -0,0 +1,21 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Kernel version string
+ */
+
+#include "boot.h"
+#include <generated/utsrelease.h>
+#include <generated/compile.h>
+
+const char kernel_version[] =
+ UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
+ UTS_VERSION;
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h
new file mode 100644
index 000000000..468e44462
--- /dev/null
+++ b/arch/x86/boot/vesa.h
@@ -0,0 +1,72 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifndef BOOT_VESA_H
+#define BOOT_VESA_H
+
+typedef struct {
+ u16 off, seg;
+} far_ptr;
+
+/* VESA General Information table */
+struct vesa_general_info {
+ u32 signature; /* 0 Magic number = "VESA" */
+ u16 version; /* 4 */
+ far_ptr vendor_string; /* 6 */
+ u32 capabilities; /* 10 */
+ far_ptr video_mode_ptr; /* 14 */
+ u16 total_memory; /* 18 */
+
+ u8 reserved[236]; /* 20 */
+} __attribute__ ((packed));
+
+#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
+
+struct vesa_mode_info {
+ u16 mode_attr; /* 0 */
+ u8 win_attr[2]; /* 2 */
+ u16 win_grain; /* 4 */
+ u16 win_size; /* 6 */
+ u16 win_seg[2]; /* 8 */
+ far_ptr win_scheme; /* 12 */
+ u16 logical_scan; /* 16 */
+
+ u16 h_res; /* 18 */
+ u16 v_res; /* 20 */
+ u8 char_width; /* 22 */
+ u8 char_height; /* 23 */
+ u8 memory_planes; /* 24 */
+ u8 bpp; /* 25 */
+ u8 banks; /* 26 */
+ u8 memory_layout; /* 27 */
+ u8 bank_size; /* 28 */
+ u8 image_planes; /* 29 */
+ u8 page_function; /* 30 */
+
+ u8 rmask; /* 31 */
+ u8 rpos; /* 32 */
+ u8 gmask; /* 33 */
+ u8 gpos; /* 34 */
+ u8 bmask; /* 35 */
+ u8 bpos; /* 36 */
+ u8 resv_mask; /* 37 */
+ u8 resv_pos; /* 38 */
+ u8 dcm_info; /* 39 */
+
+ u32 lfb_ptr; /* 40 Linear frame buffer address */
+ u32 offscreen_ptr; /* 44 Offscreen memory address */
+ u16 offscreen_size; /* 48 */
+
+ u8 reserved[206]; /* 50 */
+} __attribute__ ((packed));
+
+#endif /* LIB_SYS_VESA_H */
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
new file mode 100644
index 000000000..49e0c1883
--- /dev/null
+++ b/arch/x86/boot/video-bios.c
@@ -0,0 +1,128 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Standard video BIOS modes
+ *
+ * We have two options for this; silent and scanned.
+ */
+
+#include "boot.h"
+#include "video.h"
+
+static __videocard video_bios;
+
+/* Set a conventional BIOS mode */
+static int set_bios_mode(u8 mode);
+
+static int bios_set_mode(struct mode_info *mi)
+{
+ return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
+}
+
+static int set_bios_mode(u8 mode)
+{
+ struct biosregs ireg, oreg;
+ u8 new_mode;
+
+ initregs(&ireg);
+ ireg.al = mode; /* AH=0x00 Set Video Mode */
+ intcall(0x10, &ireg, NULL);
+
+ ireg.ah = 0x0f; /* Get Current Video Mode */
+ intcall(0x10, &ireg, &oreg);
+
+ do_restore = 1; /* Assume video contents were lost */
+
+ /* Not all BIOSes are clean with the top bit */
+ new_mode = oreg.al & 0x7f;
+
+ if (new_mode == mode)
+ return 0; /* Mode change OK */
+
+#ifndef _WAKEUP
+ if (new_mode != boot_params.screen_info.orig_video_mode) {
+ /* Mode setting failed, but we didn't end up where we
+ started. That's bad. Try to revert to the original
+ video mode. */
+ ireg.ax = boot_params.screen_info.orig_video_mode;
+ intcall(0x10, &ireg, NULL);
+ }
+#endif
+ return -1;
+}
+
+static int bios_probe(void)
+{
+ u8 mode;
+#ifdef _WAKEUP
+ u8 saved_mode = 0x03;
+#else
+ u8 saved_mode = boot_params.screen_info.orig_video_mode;
+#endif
+ u16 crtc;
+ struct mode_info *mi;
+ int nmodes = 0;
+
+ if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
+ return 0;
+
+ set_fs(0);
+ crtc = vga_crtc();
+
+ video_bios.modes = GET_HEAP(struct mode_info, 0);
+
+ for (mode = 0x14; mode <= 0x7f; mode++) {
+ if (!heap_free(sizeof(struct mode_info)))
+ break;
+
+ if (mode_defined(VIDEO_FIRST_BIOS+mode))
+ continue;
+
+ if (set_bios_mode(mode))
+ continue;
+
+ /* Try to verify that it's a text mode. */
+
+ /* Attribute Controller: make graphics controller disabled */
+ if (in_idx(0x3c0, 0x10) & 0x01)
+ continue;
+
+ /* Graphics Controller: verify Alpha addressing enabled */
+ if (in_idx(0x3ce, 0x06) & 0x01)
+ continue;
+
+ /* CRTC cursor location low should be zero(?) */
+ if (in_idx(crtc, 0x0f))
+ continue;
+
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = VIDEO_FIRST_BIOS+mode;
+ mi->depth = 0; /* text */
+ mi->x = rdfs16(0x44a);
+ mi->y = rdfs8(0x484)+1;
+ nmodes++;
+ }
+
+ set_bios_mode(saved_mode);
+
+ return nmodes;
+}
+
+static __videocard video_bios =
+{
+ .card_name = "BIOS",
+ .probe = bios_probe,
+ .set_mode = bios_set_mode,
+ .unsafe = 1,
+ .xmode_first = VIDEO_FIRST_BIOS,
+ .xmode_n = 0x80,
+};
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
new file mode 100644
index 000000000..95c7a818c
--- /dev/null
+++ b/arch/x86/boot/video-mode.c
@@ -0,0 +1,173 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/i386/boot/video-mode.c
+ *
+ * Set the video mode. This is separated out into a different
+ * file in order to be shared with the ACPI wakeup code.
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+#include <uapi/asm/boot.h>
+
+/*
+ * Common variables
+ */
+int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
+int force_x, force_y; /* Don't query the BIOS for cols/rows */
+int do_restore; /* Screen contents changed during mode flip */
+int graphic_mode; /* Graphic mode with linear frame buffer */
+
+/* Probe the video drivers and have them generate their mode lists. */
+void probe_cards(int unsafe)
+{
+ struct card_info *card;
+ static u8 probed[2];
+
+ if (probed[unsafe])
+ return;
+
+ probed[unsafe] = 1;
+
+ for (card = video_cards; card < video_cards_end; card++) {
+ if (card->unsafe == unsafe) {
+ if (card->probe)
+ card->nmodes = card->probe();
+ else
+ card->nmodes = 0;
+ }
+ }
+}
+
+/* Test if a mode is defined */
+int mode_defined(u16 mode)
+{
+ struct card_info *card;
+ struct mode_info *mi;
+ int i;
+
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ if (mi->mode == mode)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* Set mode (without recalc) */
+static int raw_set_mode(u16 mode, u16 *real_mode)
+{
+ int nmode, i;
+ struct card_info *card;
+ struct mode_info *mi;
+
+ /* Drop the recalc bit if set */
+ mode &= ~VIDEO_RECALC;
+
+ /* Scan for mode based on fixed ID, position, or resolution */
+ nmode = 0;
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ int visible = mi->x || mi->y;
+
+ if ((mode == nmode && visible) ||
+ mode == mi->mode ||
+ mode == (mi->y << 8)+mi->x) {
+ *real_mode = mi->mode;
+ return card->set_mode(mi);
+ }
+
+ if (visible)
+ nmode++;
+ }
+ }
+
+ /* Nothing found? Is it an "exceptional" (unprobed) mode? */
+ for (card = video_cards; card < video_cards_end; card++) {
+ if (mode >= card->xmode_first &&
+ mode < card->xmode_first+card->xmode_n) {
+ struct mode_info mix;
+ *real_mode = mix.mode = mode;
+ mix.x = mix.y = 0;
+ return card->set_mode(&mix);
+ }
+ }
+
+ /* Otherwise, failure... */
+ return -1;
+}
+
+/*
+ * Recalculate the vertical video cutoff (hack!)
+ */
+static void vga_recalc_vertical(void)
+{
+ unsigned int font_size, rows;
+ u16 crtc;
+ u8 pt, ov;
+
+ set_fs(0);
+ font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
+ rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
+
+ rows *= font_size; /* Visible scan lines */
+ rows--; /* ... minus one */
+
+ crtc = vga_crtc();
+
+ pt = in_idx(crtc, 0x11);
+ pt &= ~0x80; /* Unlock CR0-7 */
+ out_idx(pt, crtc, 0x11);
+
+ out_idx((u8)rows, crtc, 0x12); /* Lower height register */
+
+ ov = in_idx(crtc, 0x07); /* Overflow register */
+ ov &= 0xbd;
+ ov |= (rows >> (8-1)) & 0x02;
+ ov |= (rows >> (9-6)) & 0x40;
+ out_idx(ov, crtc, 0x07);
+}
+
+/* Set mode (with recalc if specified) */
+int set_mode(u16 mode)
+{
+ int rv;
+ u16 real_mode;
+
+ /* Very special mode numbers... */
+ if (mode == VIDEO_CURRENT_MODE)
+ return 0; /* Nothing to do... */
+ else if (mode == NORMAL_VGA)
+ mode = VIDEO_80x25;
+ else if (mode == EXTENDED_VGA)
+ mode = VIDEO_8POINT;
+
+ rv = raw_set_mode(mode, &real_mode);
+ if (rv)
+ return rv;
+
+ if (mode & VIDEO_RECALC)
+ vga_recalc_vertical();
+
+ /* Save the canonical mode number for the kernel, not
+ an alias, size specification or menu position */
+#ifndef _WAKEUP
+ boot_params.hdr.vid_mode = real_mode;
+#endif
+ return 0;
+}
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
new file mode 100644
index 000000000..ba3e10065
--- /dev/null
+++ b/arch/x86/boot/video-vesa.c
@@ -0,0 +1,281 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * VESA text modes
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+#include "string.h"
+
+/* VESA information */
+static struct vesa_general_info vginfo;
+static struct vesa_mode_info vminfo;
+
+static __videocard video_vesa;
+
+#ifndef _WAKEUP
+static void vesa_store_mode_params_graphics(void);
+#else /* _WAKEUP */
+static inline void vesa_store_mode_params_graphics(void) {}
+#endif /* _WAKEUP */
+
+static int vesa_probe(void)
+{
+ struct biosregs ireg, oreg;
+ u16 mode;
+ addr_t mode_ptr;
+ struct mode_info *mi;
+ int nmodes = 0;
+
+ video_vesa.modes = GET_HEAP(struct mode_info, 0);
+
+ initregs(&ireg);
+ ireg.ax = 0x4f00;
+ ireg.di = (size_t)&vginfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f ||
+ vginfo.signature != VESA_MAGIC ||
+ vginfo.version < 0x0102)
+ return 0; /* Not present */
+
+ set_fs(vginfo.video_mode_ptr.seg);
+ mode_ptr = vginfo.video_mode_ptr.off;
+
+ while ((mode = rdfs16(mode_ptr)) != 0xffff) {
+ mode_ptr += 2;
+
+ if (!heap_free(sizeof(struct mode_info)))
+ break; /* Heap full, can't save mode info */
+
+ if (mode & ~0x1ff)
+ continue;
+
+ memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+
+ ireg.ax = 0x4f01;
+ ireg.cx = mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ continue;
+
+ if ((vminfo.mode_attr & 0x15) == 0x05) {
+ /* Text Mode, TTY BIOS supported,
+ supported by hardware */
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = mode + VIDEO_FIRST_VESA;
+ mi->depth = 0; /* text */
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
+ nmodes++;
+ } else if ((vminfo.mode_attr & 0x99) == 0x99 &&
+ (vminfo.memory_layout == 4 ||
+ vminfo.memory_layout == 6) &&
+ vminfo.memory_planes == 1) {
+#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
+ /* Graphics mode, color, linear frame buffer
+ supported. Only register the mode if
+ if framebuffer is configured, however,
+ otherwise the user will be left without a screen. */
+ mi = GET_HEAP(struct mode_info, 1);
+ mi->mode = mode + VIDEO_FIRST_VESA;
+ mi->depth = vminfo.bpp;
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
+ nmodes++;
+#endif
+ }
+ }
+
+ return nmodes;
+}
+
+static int vesa_set_mode(struct mode_info *mode)
+{
+ struct biosregs ireg, oreg;
+ int is_graphic;
+ u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
+
+ memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+
+ initregs(&ireg);
+ ireg.ax = 0x4f01;
+ ireg.cx = vesa_mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return -1;
+
+ if ((vminfo.mode_attr & 0x15) == 0x05) {
+ /* It's a supported text mode */
+ is_graphic = 0;
+#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
+ } else if ((vminfo.mode_attr & 0x99) == 0x99) {
+ /* It's a graphics mode with linear frame buffer */
+ is_graphic = 1;
+ vesa_mode |= 0x4000; /* Request linear frame buffer */
+#endif
+ } else {
+ return -1; /* Invalid mode */
+ }
+
+
+ initregs(&ireg);
+ ireg.ax = 0x4f02;
+ ireg.bx = vesa_mode;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return -1;
+
+ graphic_mode = is_graphic;
+ if (!is_graphic) {
+ /* Text mode */
+ force_x = mode->x;
+ force_y = mode->y;
+ do_restore = 1;
+ } else {
+ /* Graphics mode */
+ vesa_store_mode_params_graphics();
+ }
+
+ return 0;
+}
+
+
+#ifndef _WAKEUP
+
+/* Switch DAC to 8-bit mode */
+static void vesa_dac_set_8bits(void)
+{
+ struct biosregs ireg, oreg;
+ u8 dac_size = 6;
+
+ /* If possible, switch the DAC to 8-bit mode */
+ if (vginfo.capabilities & 1) {
+ initregs(&ireg);
+ ireg.ax = 0x4f08;
+ ireg.bh = 0x08;
+ intcall(0x10, &ireg, &oreg);
+ if (oreg.ax == 0x004f)
+ dac_size = oreg.bh;
+ }
+
+ /* Set the color sizes to the DAC size, and offsets to 0 */
+ boot_params.screen_info.red_size = dac_size;
+ boot_params.screen_info.green_size = dac_size;
+ boot_params.screen_info.blue_size = dac_size;
+ boot_params.screen_info.rsvd_size = dac_size;
+
+ boot_params.screen_info.red_pos = 0;
+ boot_params.screen_info.green_pos = 0;
+ boot_params.screen_info.blue_pos = 0;
+ boot_params.screen_info.rsvd_pos = 0;
+}
+
+/* Save the VESA protected mode info */
+static void vesa_store_pm_info(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ax = 0x4f0a;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return;
+
+ boot_params.screen_info.vesapm_seg = oreg.es;
+ boot_params.screen_info.vesapm_off = oreg.di;
+}
+
+/*
+ * Save video mode parameters for graphics mode
+ */
+static void vesa_store_mode_params_graphics(void)
+{
+ /* Tell the kernel we're in VESA graphics mode */
+ boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
+
+ /* Mode parameters */
+ boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
+ boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
+ boot_params.screen_info.lfb_width = vminfo.h_res;
+ boot_params.screen_info.lfb_height = vminfo.v_res;
+ boot_params.screen_info.lfb_depth = vminfo.bpp;
+ boot_params.screen_info.pages = vminfo.image_planes;
+ boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
+ memcpy(&boot_params.screen_info.red_size,
+ &vminfo.rmask, 8);
+
+ /* General parameters */
+ boot_params.screen_info.lfb_size = vginfo.total_memory;
+
+ if (vminfo.bpp <= 8)
+ vesa_dac_set_8bits();
+
+ vesa_store_pm_info();
+}
+
+/*
+ * Save EDID information for the kernel; this is invoked, separately,
+ * after mode-setting.
+ */
+void vesa_store_edid(void)
+{
+#ifdef CONFIG_FIRMWARE_EDID
+ struct biosregs ireg, oreg;
+
+ /* Apparently used as a nonsense token... */
+ memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
+
+ if (vginfo.version < 0x0200)
+ return; /* EDID requires VBE 2.0+ */
+
+ initregs(&ireg);
+ ireg.ax = 0x4f15; /* VBE DDC */
+ /* ireg.bx = 0x0000; */ /* Report DDC capabilities */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ ireg.es = 0; /* ES:DI must be 0 by spec */
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.ax != 0x004f)
+ return; /* No EDID */
+
+ /* BH = time in seconds to transfer EDD information */
+ /* BL = DDC level supported */
+
+ ireg.ax = 0x4f15; /* VBE DDC */
+ ireg.bx = 0x0001; /* Read EDID */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ /* ireg.dx = 0; */ /* EDID block number */
+ ireg.es = ds();
+ ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+ intcall(0x10, &ireg, &oreg);
+#endif /* CONFIG_FIRMWARE_EDID */
+}
+
+#endif /* not _WAKEUP */
+
+static __videocard video_vesa =
+{
+ .card_name = "VESA",
+ .probe = vesa_probe,
+ .set_mode = vesa_set_mode,
+ .xmode_first = VIDEO_FIRST_VESA,
+ .xmode_n = 0x200,
+};
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
new file mode 100644
index 000000000..a14c5178d
--- /dev/null
+++ b/arch/x86/boot/video-vga.c
@@ -0,0 +1,288 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Common all-VGA modes
+ */
+
+#include "boot.h"
+#include "video.h"
+
+static struct mode_info vga_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 50, 0 },
+ { VIDEO_80x43, 80, 43, 0 },
+ { VIDEO_80x28, 80, 28, 0 },
+ { VIDEO_80x30, 80, 30, 0 },
+ { VIDEO_80x34, 80, 34, 0 },
+ { VIDEO_80x60, 80, 60, 0 },
+};
+
+static struct mode_info ega_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 43, 0 },
+};
+
+static struct mode_info cga_modes[] = {
+ { VIDEO_80x25, 80, 25, 0 },
+};
+
+static __videocard video_vga;
+
+/* Set basic 80x25 mode */
+static u8 vga_set_basic_mode(void)
+{
+ struct biosregs ireg, oreg;
+ u8 mode;
+
+ initregs(&ireg);
+
+ /* Query current mode */
+ ireg.ax = 0x0f00;
+ intcall(0x10, &ireg, &oreg);
+ mode = oreg.al;
+
+ if (mode != 3 && mode != 7)
+ mode = 3;
+
+ /* Set the mode */
+ ireg.ax = mode; /* AH=0: set mode */
+ intcall(0x10, &ireg, NULL);
+ do_restore = 1;
+ return mode;
+}
+
+static void vga_set_8font(void)
+{
+ /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 8x8 font */
+ ireg.ax = 0x1112;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
+
+ /* Use alternate print screen */
+ ireg.ax = 0x1200;
+ ireg.bl = 0x20;
+ intcall(0x10, &ireg, NULL);
+
+ /* Turn off cursor emulation */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
+
+ /* Cursor is scan lines 6-7 */
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0607;
+ intcall(0x10, &ireg, NULL);
+}
+
+static void vga_set_14font(void)
+{
+ /* Set 9x14 font - 80x28 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 9x14 font */
+ ireg.ax = 0x1111;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
+
+ /* Turn off cursor emulation */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
+
+ /* Cursor is scan lines 11-12 */
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0b0c;
+ intcall(0x10, &ireg, NULL);
+}
+
+static void vga_set_80x43(void)
+{
+ /* Set 80x43 mode on VGA (not EGA) */
+ struct biosregs ireg;
+
+ initregs(&ireg);
+
+ /* Set 350 scans */
+ ireg.ax = 0x1201;
+ ireg.bl = 0x30;
+ intcall(0x10, &ireg, NULL);
+
+ /* Reset video mode */
+ ireg.ax = 0x0003;
+ intcall(0x10, &ireg, NULL);
+
+ vga_set_8font();
+}
+
+/* I/O address of the VGA CRTC */
+u16 vga_crtc(void)
+{
+ return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
+}
+
+static void vga_set_480_scanlines(void)
+{
+ u16 crtc; /* CRTC base address */
+ u8 csel; /* CRTC miscellaneous output register */
+
+ crtc = vga_crtc();
+
+ out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
+ out_idx(0x0b, crtc, 0x06); /* Vertical total */
+ out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
+ out_idx(0xea, crtc, 0x10); /* Vertical sync start */
+ out_idx(0xdf, crtc, 0x12); /* Vertical display end */
+ out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
+ out_idx(0x04, crtc, 0x16); /* Vertical blank end */
+ csel = inb(0x3cc);
+ csel &= 0x0d;
+ csel |= 0xe2;
+ outb(csel, 0x3c2);
+}
+
+static void vga_set_vertical_end(int lines)
+{
+ u16 crtc; /* CRTC base address */
+ u8 ovfw; /* CRTC overflow register */
+ int end = lines-1;
+
+ crtc = vga_crtc();
+
+ ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
+
+ out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
+ out_idx(end, crtc, 0x12); /* Vertical display end */
+}
+
+static void vga_set_80x30(void)
+{
+ vga_set_480_scanlines();
+ vga_set_vertical_end(30*16);
+}
+
+static void vga_set_80x34(void)
+{
+ vga_set_480_scanlines();
+ vga_set_14font();
+ vga_set_vertical_end(34*14);
+}
+
+static void vga_set_80x60(void)
+{
+ vga_set_480_scanlines();
+ vga_set_8font();
+ vga_set_vertical_end(60*8);
+}
+
+static int vga_set_mode(struct mode_info *mode)
+{
+ /* Set the basic mode */
+ vga_set_basic_mode();
+
+ /* Override a possibly broken BIOS */
+ force_x = mode->x;
+ force_y = mode->y;
+
+ switch (mode->mode) {
+ case VIDEO_80x25:
+ break;
+ case VIDEO_8POINT:
+ vga_set_8font();
+ break;
+ case VIDEO_80x43:
+ vga_set_80x43();
+ break;
+ case VIDEO_80x28:
+ vga_set_14font();
+ break;
+ case VIDEO_80x30:
+ vga_set_80x30();
+ break;
+ case VIDEO_80x34:
+ vga_set_80x34();
+ break;
+ case VIDEO_80x60:
+ vga_set_80x60();
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Note: this probe includes basic information required by all
+ * systems. It should be executed first, by making sure
+ * video-vga.c is listed first in the Makefile.
+ */
+static int vga_probe(void)
+{
+ static const char *card_name[] = {
+ "CGA/MDA/HGC", "EGA", "VGA"
+ };
+ static struct mode_info *mode_lists[] = {
+ cga_modes,
+ ega_modes,
+ vga_modes,
+ };
+ static int mode_count[] = {
+ ARRAY_SIZE(cga_modes),
+ ARRAY_SIZE(ega_modes),
+ ARRAY_SIZE(vga_modes),
+ };
+
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+
+ ireg.ax = 0x1200;
+ ireg.bl = 0x10; /* Check EGA/VGA */
+ intcall(0x10, &ireg, &oreg);
+
+#ifndef _WAKEUP
+ boot_params.screen_info.orig_video_ega_bx = oreg.bx;
+#endif
+
+ /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
+ if (oreg.bl != 0x10) {
+ /* EGA/VGA */
+ ireg.ax = 0x1a00;
+ intcall(0x10, &ireg, &oreg);
+
+ if (oreg.al == 0x1a) {
+ adapter = ADAPTER_VGA;
+#ifndef _WAKEUP
+ boot_params.screen_info.orig_video_isVGA = 1;
+#endif
+ } else {
+ adapter = ADAPTER_EGA;
+ }
+ } else {
+ adapter = ADAPTER_CGA;
+ }
+
+ video_vga.modes = mode_lists[adapter];
+ video_vga.card_name = card_name[adapter];
+ return mode_count[adapter];
+}
+
+static __videocard video_vga = {
+ .card_name = "VGA",
+ .probe = vga_probe,
+ .set_mode = vga_set_mode,
+};
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
new file mode 100644
index 000000000..77780e386
--- /dev/null
+++ b/arch/x86/boot/video.c
@@ -0,0 +1,345 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Select video mode
+ */
+
+#include <uapi/asm/boot.h>
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+static u16 video_segment;
+
+static void store_cursor_position(void)
+{
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x03;
+ intcall(0x10, &ireg, &oreg);
+
+ boot_params.screen_info.orig_x = oreg.dl;
+ boot_params.screen_info.orig_y = oreg.dh;
+
+ if (oreg.ch & 0x20)
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+
+ if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+}
+
+static void store_video_mode(void)
+{
+ struct biosregs ireg, oreg;
+
+ /* N.B.: the saving of the video page here is a bit silly,
+ since we pretty much assume page 0 everywhere. */
+ initregs(&ireg);
+ ireg.ah = 0x0f;
+ intcall(0x10, &ireg, &oreg);
+
+ /* Not all BIOSes are clean with respect to the top bit */
+ boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+ boot_params.screen_info.orig_video_page = oreg.bh;
+}
+
+/*
+ * Store the video mode parameters for later usage by the kernel.
+ * This is done by asking the BIOS except for the rows/columns
+ * parameters in the default 80x25 mode -- these are set directly,
+ * because some very obscure BIOSes supply insane values.
+ */
+static void store_mode_params(void)
+{
+ u16 font_size;
+ int x, y;
+
+ /* For graphics mode, it is up to the mode-setting driver
+ (currently only video-vesa.c) to store the parameters */
+ if (graphic_mode)
+ return;
+
+ store_cursor_position();
+ store_video_mode();
+
+ if (boot_params.screen_info.orig_video_mode == 0x07) {
+ /* MDA, HGC, or VGA in monochrome mode */
+ video_segment = 0xb000;
+ } else {
+ /* CGA, EGA, VGA and so forth */
+ video_segment = 0xb800;
+ }
+
+ set_fs(0);
+ font_size = rdfs16(0x485); /* Font size, BIOS area */
+ boot_params.screen_info.orig_video_points = font_size;
+
+ x = rdfs16(0x44a);
+ y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
+
+ if (force_x)
+ x = force_x;
+ if (force_y)
+ y = force_y;
+
+ boot_params.screen_info.orig_video_cols = x;
+ boot_params.screen_info.orig_video_lines = y;
+}
+
+static unsigned int get_entry(void)
+{
+ char entry_buf[4];
+ int i, len = 0;
+ int key;
+ unsigned int v;
+
+ do {
+ key = getchar();
+
+ if (key == '\b') {
+ if (len > 0) {
+ puts("\b \b");
+ len--;
+ }
+ } else if ((key >= '0' && key <= '9') ||
+ (key >= 'A' && key <= 'Z') ||
+ (key >= 'a' && key <= 'z')) {
+ if (len < sizeof entry_buf) {
+ entry_buf[len++] = key;
+ putchar(key);
+ }
+ }
+ } while (key != '\r');
+ putchar('\n');
+
+ if (len == 0)
+ return VIDEO_CURRENT_MODE; /* Default */
+
+ v = 0;
+ for (i = 0; i < len; i++) {
+ v <<= 4;
+ key = entry_buf[i] | 0x20;
+ v += (key > '9') ? key-'a'+10 : key-'0';
+ }
+
+ return v;
+}
+
+static void display_menu(void)
+{
+ struct card_info *card;
+ struct mode_info *mi;
+ char ch;
+ int i;
+ int nmodes;
+ int modes_per_line;
+ int col;
+
+ nmodes = 0;
+ for (card = video_cards; card < video_cards_end; card++)
+ nmodes += card->nmodes;
+
+ modes_per_line = 1;
+ if (nmodes >= 20)
+ modes_per_line = 3;
+
+ for (col = 0; col < modes_per_line; col++)
+ puts("Mode: Resolution: Type: ");
+ putchar('\n');
+
+ col = 0;
+ ch = '0';
+ for (card = video_cards; card < video_cards_end; card++) {
+ mi = card->modes;
+ for (i = 0; i < card->nmodes; i++, mi++) {
+ char resbuf[32];
+ int visible = mi->x && mi->y;
+ u16 mode_id = mi->mode ? mi->mode :
+ (mi->y << 8)+mi->x;
+
+ if (!visible)
+ continue; /* Hidden mode */
+
+ if (mi->depth)
+ sprintf(resbuf, "%dx%d", mi->y, mi->depth);
+ else
+ sprintf(resbuf, "%d", mi->y);
+
+ printf("%c %03X %4dx%-7s %-6s",
+ ch, mode_id, mi->x, resbuf, card->card_name);
+ col++;
+ if (col >= modes_per_line) {
+ putchar('\n');
+ col = 0;
+ }
+
+ if (ch == '9')
+ ch = 'a';
+ else if (ch == 'z' || ch == ' ')
+ ch = ' '; /* Out of keys... */
+ else
+ ch++;
+ }
+ }
+ if (col)
+ putchar('\n');
+}
+
+#define H(x) ((x)-'a'+10)
+#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
+
+static unsigned int mode_menu(void)
+{
+ int key;
+ unsigned int sel;
+
+ puts("Press <ENTER> to see video modes available, "
+ "<SPACE> to continue, or wait 30 sec\n");
+
+ kbd_flush();
+ while (1) {
+ key = getchar_timeout();
+ if (key == ' ' || key == 0)
+ return VIDEO_CURRENT_MODE; /* Default */
+ if (key == '\r')
+ break;
+ putchar('\a'); /* Beep! */
+ }
+
+
+ for (;;) {
+ display_menu();
+
+ puts("Enter a video mode or \"scan\" to scan for "
+ "additional modes: ");
+ sel = get_entry();
+ if (sel != SCAN)
+ return sel;
+
+ probe_cards(1);
+ }
+}
+
+/* Save screen content to the heap */
+static struct saved_screen {
+ int x, y;
+ int curx, cury;
+ u16 *data;
+} saved;
+
+static void save_screen(void)
+{
+ /* Should be called after store_mode_params() */
+ saved.x = boot_params.screen_info.orig_video_cols;
+ saved.y = boot_params.screen_info.orig_video_lines;
+ saved.curx = boot_params.screen_info.orig_x;
+ saved.cury = boot_params.screen_info.orig_y;
+
+ if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
+ return; /* Not enough heap to save the screen */
+
+ saved.data = GET_HEAP(u16, saved.x*saved.y);
+
+ set_fs(video_segment);
+ copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
+}
+
+static void restore_screen(void)
+{
+ /* Should be called after store_mode_params() */
+ int xs = boot_params.screen_info.orig_video_cols;
+ int ys = boot_params.screen_info.orig_video_lines;
+ int y;
+ addr_t dst = 0;
+ u16 *src = saved.data;
+ struct biosregs ireg;
+
+ if (graphic_mode)
+ return; /* Can't restore onto a graphic mode */
+
+ if (!src)
+ return; /* No saved screen contents */
+
+ /* Restore screen contents */
+
+ set_fs(video_segment);
+ for (y = 0; y < ys; y++) {
+ int npad;
+
+ if (y < saved.y) {
+ int copy = (xs < saved.x) ? xs : saved.x;
+ copy_to_fs(dst, src, copy*sizeof(u16));
+ dst += copy*sizeof(u16);
+ src += saved.x;
+ npad = (xs < saved.x) ? 0 : xs-saved.x;
+ } else {
+ npad = xs;
+ }
+
+ /* Writes "npad" blank characters to
+ video_segment:dst and advances dst */
+ asm volatile("pushw %%es ; "
+ "movw %2,%%es ; "
+ "shrw %%cx ; "
+ "jnc 1f ; "
+ "stosw \n\t"
+ "1: rep;stosl ; "
+ "popw %%es"
+ : "+D" (dst), "+c" (npad)
+ : "bdS" (video_segment),
+ "a" (0x07200720));
+ }
+
+ /* Restore cursor position */
+ if (saved.curx >= xs)
+ saved.curx = xs-1;
+ if (saved.cury >= ys)
+ saved.cury = ys-1;
+
+ initregs(&ireg);
+ ireg.ah = 0x02; /* Set cursor position */
+ ireg.dh = saved.cury;
+ ireg.dl = saved.curx;
+ intcall(0x10, &ireg, NULL);
+
+ store_cursor_position();
+}
+
+void set_video(void)
+{
+ u16 mode = boot_params.hdr.vid_mode;
+
+ RESET_HEAP();
+
+ store_mode_params();
+ save_screen();
+ probe_cards(0);
+
+ for (;;) {
+ if (mode == ASK_VGA)
+ mode = mode_menu();
+
+ if (!set_mode(mode))
+ break;
+
+ printf("Undefined video mode number: %x\n", mode);
+ mode = ASK_VGA;
+ }
+ boot_params.hdr.vid_mode = mode;
+ vesa_store_edid();
+ store_mode_params();
+
+ if (do_restore)
+ restore_screen();
+}
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
new file mode 100644
index 000000000..b54e0328c
--- /dev/null
+++ b/arch/x86/boot/video.h
@@ -0,0 +1,120 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Header file for the real-mode video probing code
+ */
+
+#ifndef BOOT_VIDEO_H
+#define BOOT_VIDEO_H
+
+#include <linux/types.h>
+
+/*
+ * This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ * NORMAL_VGA (-1)
+ * EXTENDED_VGA (-2)
+ * ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+/* Special video modes */
+#define VIDEO_FIRST_SPECIAL 0x0f00
+#define VIDEO_80x25 0x0f00
+#define VIDEO_8POINT 0x0f01
+#define VIDEO_80x43 0x0f02
+#define VIDEO_80x28 0x0f03
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_GFX_HACK 0x0f08
+#define VIDEO_LAST_SPECIAL 0x0f09
+
+/* Video modes given by resolution */
+#define VIDEO_FIRST_RESOLUTION 0x1000
+
+/* The "recalculate timings" flag */
+#define VIDEO_RECALC 0x8000
+
+void store_screen(void);
+#define DO_STORE() store_screen()
+
+/*
+ * Mode table structures
+ */
+
+struct mode_info {
+ u16 mode; /* Mode number (vga= style) */
+ u16 x, y; /* Width, height */
+ u16 depth; /* Bits per pixel, 0 for text mode */
+};
+
+struct card_info {
+ const char *card_name;
+ int (*set_mode)(struct mode_info *mode);
+ int (*probe)(void);
+ struct mode_info *modes;
+ int nmodes; /* Number of probed modes so far */
+ int unsafe; /* Probing is unsafe, only do after "scan" */
+ u16 xmode_first; /* Unprobed modes to try to call anyway */
+ u16 xmode_n; /* Size of unprobed mode range */
+};
+
+#define __videocard struct card_info __attribute__((used,section(".videocards")))
+extern struct card_info video_cards[], video_cards_end[];
+
+int mode_defined(u16 mode); /* video.c */
+
+/* Basic video information */
+#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
+#define ADAPTER_EGA 1
+#define ADAPTER_VGA 2
+
+extern int adapter;
+extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
+extern int do_restore; /* Restore screen contents */
+extern int graphic_mode; /* Graphics mode with linear frame buffer */
+
+/* Accessing VGA indexed registers */
+static inline u8 in_idx(u16 port, u8 index)
+{
+ outb(index, port);
+ return inb(port+1);
+}
+
+static inline void out_idx(u8 v, u16 port, u8 index)
+{
+ outw(index+(v << 8), port);
+}
+
+/* Writes a value to an indexed port and then reads the port again */
+static inline u8 tst_idx(u8 v, u16 port, u8 index)
+{
+ out_idx(port, index, v);
+ return in_idx(port, index);
+}
+
+/* Get the I/O port of the VGA CRTC */
+u16 vga_crtc(void); /* video-vga.c */
+
+#endif /* BOOT_VIDEO_H */