diff options
Diffstat (limited to 'kexec/arch/i386')
-rw-r--r-- | kexec/arch/i386/Makefile | 20 | ||||
-rw-r--r-- | kexec/arch/i386/crashdump-x86.c | 1049 | ||||
-rw-r--r-- | kexec/arch/i386/crashdump-x86.h | 33 | ||||
-rw-r--r-- | kexec/arch/i386/include/arch/options.h | 86 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-beoboot-x86.c | 132 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-bzImage.c | 471 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-elf-rel-x86.c | 36 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-elf-x86.c | 331 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-mb2-x86.c | 616 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-multiboot-x86.c | 505 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-nbi.c | 249 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-x86-common.c | 444 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-x86.c | 210 | ||||
-rw-r--r-- | kexec/arch/i386/kexec-x86.h | 97 | ||||
-rw-r--r-- | kexec/arch/i386/x86-linux-setup.c | 971 | ||||
-rw-r--r-- | kexec/arch/i386/x86-linux-setup.h | 38 |
16 files changed, 5288 insertions, 0 deletions
diff --git a/kexec/arch/i386/Makefile b/kexec/arch/i386/Makefile new file mode 100644 index 0000000..f486103 --- /dev/null +++ b/kexec/arch/i386/Makefile @@ -0,0 +1,20 @@ +# +# kexec i386 (linux booting linux) +# +i386_KEXEC_SRCS = kexec/arch/i386/kexec-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-rel-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-bzImage.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-mb2-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +i386_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c +i386_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c + +dist += kexec/arch/i386/Makefile $(i386_KEXEC_SRCS) \ + kexec/arch/i386/crashdump-x86.h \ + kexec/arch/i386/kexec-x86.h \ + kexec/arch/i386/x86-linux-setup.h \ + kexec/arch/i386/include/arch/options.h diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c new file mode 100644 index 0000000..df1f24c --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.c @@ -0,0 +1,1049 @@ +/* + * kexec: Linux boots Linux + * + * Created by: Vivek Goyal (vgoyal@in.ibm.com) + * old x86_64 version Created by: Murali M Chakravarthy (muralim@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include "../../kexec-xen.h" +#include "x86-linux-setup.h" +#include <x86/x86-linux.h> + +extern struct arch_options_t arch_options; + +static int get_kernel_page_offset(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + + if (elf_info->machine == EM_X86_64) { + /* get_kernel_vaddr_and_size will override this */ + elf_info->page_offset = X86_64_PAGE_OFFSET; + } + else if (elf_info->machine == EM_386) { + elf_info->page_offset = X86_PAGE_OFFSET; + } + + return 0; +} + +#define X86_64_KERN_VADDR_ALIGN 0x100000 /* 1MB */ + +/* Read kernel physical load addr from the file returned by proc_iomem() + * (Kernel Code) and store in kexec_info */ +static int get_kernel_paddr(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + uint64_t start; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { + elf_info->kern_paddr_start = start; + dbgprintf("kernel load physical addr start = 0x%016Lx\n", + (unsigned long long)start); + return 0; + } + + fprintf(stderr, "Cannot determine kernel physical load addr\n"); + return -1; +} + +/* Retrieve info regarding virtual address kernel has been compiled for and + * size of the kernel from /proc/kcore. Current /proc/kcore parsing from + * from kexec-tools fails because of malformed elf notes. A kernel patch has + * been submitted. For the folks using older kernels, this function + * hard codes the values to remain backward compatible. Once things stablize + * we should get rid of backward compatible code. */ + +static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + int result; + const char kcore[] = "/proc/kcore"; + char *buf; + struct mem_ehdr ehdr; + struct mem_phdr *phdr, *end_phdr; + int align; + off_t size; + uint32_t elf_flags = 0; + uint64_t stext_sym; + const unsigned long long pud_mask = ~((1 << 30) - 1); + unsigned long long vaddr, lowest_vaddr = 0; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + align = getpagesize(); + buf = slurp_file_len(kcore, KCORE_ELF_HEADERS_SIZE, &size); + if (!buf) { + fprintf(stderr, "Cannot read %s: %s\n", kcore, strerror(errno)); + return -1; + } + + /* Don't perform checks to make sure stated phdrs and shdrs are + * actually present in the core file. It is not practical + * to read the GB size file into a user space buffer, Given the + * fact that we don't use any info from that. + */ + elf_flags |= ELF_SKIP_FILESZ_CHECK; + result = build_elf_core_info(buf, size, &ehdr, elf_flags); + if (result < 0) { + /* Perhaps KCORE_ELF_HEADERS_SIZE is too small? */ + fprintf(stderr, "ELF core (kcore) parse failed\n"); + return -1; + } + + end_phdr = &ehdr.e_phdr[ehdr.e_phnum]; + + /* Search for the real PAGE_OFFSET when KASLR memory randomization + * is enabled */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + vaddr = phdr->p_vaddr & pud_mask; + if (lowest_vaddr == 0 || lowest_vaddr > vaddr) + lowest_vaddr = vaddr; + } + } + if (lowest_vaddr != 0) + elf_info->page_offset = lowest_vaddr; + + /* Traverse through the Elf headers and find the region where + * _stext symbol is located in. That's where kernel is mapped */ + stext_sym = get_kernel_sym("_stext"); + for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if (saddr <= stext_sym && eaddr > stext_sym) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + /* If failed to retrieve kernel text mapping through + * /proc/kallsyms, Traverse through the Elf headers again and + * find the region where kernel is mapped using hard-coded + * kernel mapping boundries */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if ((saddr >= X86_64__START_KERNEL_map) && + (eaddr <= X86_64__START_KERNEL_map + X86_64_KERNEL_TEXT_SIZE)) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + fprintf(stderr, "Can't find kernel text map area from kcore\n"); + return -1; +} + +/* Forward Declaration. */ +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit); +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end); + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region */ +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +/* Memory region reserved for storing panic kernel and other data. */ +#define CRASH_RESERVED_MEM_NR 8 +static struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; +static int crash_reserved_mem_nr; + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges, + int kexec_flags, unsigned long lowmem_limit) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0, gart = 0, i; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + uint64_t gart_start = 0, gart_end = 0; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + + if (memory_ranges >= CRASH_MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + dbgprintf("%016llx-%016llx : %s", + start, end, str); + /* + * We want to dump any System RAM -- memory regions currently + * used by the kernel. In the usual case, this is "System RAM" + * on the top level. However, we can also have "System RAM + * (virtio_mem)" below virtio devices or "System RAM (kmem)" + * below "Persistent Memory". + */ + if (strstr(str, "System RAM")) { + type = RANGE_RAM; + } else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + /* + * ACPI Tables area need to be passed to new + * kernel with appropriate memmap= option. This + * is needed so that x86_64 kernel creates linear + * mapping for this region which is required for + * initializing acpi tables in second kernel. + */ + type = RANGE_ACPI; + } else if(memcmp(str,"ACPI Non-volatile Storage\n",26) == 0 ) { + type = RANGE_ACPI_NVS; + } else if(memcmp(str,"Persistent Memory (legacy)\n",27) == 0 ) { + type = RANGE_PRAM; + } else if(memcmp(str,"Persistent Memory\n",18) == 0 ) { + type = RANGE_PMEM; + } else if(memcmp(str,"reserved\n",9) == 0 ) { + type = RANGE_RESERVED; + } else if (memcmp(str, "Reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else if (memcmp(str, "GART\n", 5) == 0) { + gart_start = start; + gart_end = end; + gart = 1; + continue; + } else { + continue; + } + + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + + segregate_lowmem_region(&memory_ranges, lowmem_limit); + + memory_ranges++; + } + fclose(fp); + if (kexec_flags & KEXEC_PRESERVE_CONTEXT) { + for (i = 0; i < memory_ranges; i++) { + if (crash_memory_range[i].end > 0x0009ffff) { + crash_reserved_mem[0].start = \ + crash_memory_range[i].start; + break; + } + } + if (crash_reserved_mem[0].start >= mem_max) { + fprintf(stderr, "Too small mem_max: 0x%llx.\n", + mem_max); + return -1; + } + crash_reserved_mem[0].end = mem_max; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(&memory_ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + return -1; + + if (gart) { + /* exclude GART region if the system has one */ + if (exclude_region(&memory_ranges, gart_start, gart_end) < 0) + return -1; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + struct e820entry *e820entries; + int j, rc, ret = -1; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + e820entries = xmalloc(sizeof(*e820entries) * CRASH_MAX_MEMORY_RANGES); + + rc = xc_get_machine_memory_map(xc, e820entries, CRASH_MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(-rc)); + goto err; + } + + for (i = 0, j = 0; i < rc && j < CRASH_MAX_MEMORY_RANGES; ++i, ++j) { + crash_memory_range[j].start = e820entries[i].addr; + crash_memory_range[j].end = e820entries[i].addr + e820entries[i].size - 1; + crash_memory_range[j].type = xen_e820_to_kexec_type(e820entries[i].type); + segregate_lowmem_region(&j, lowmem_limit); + } + + *range = crash_memory_range; + *ranges = j; + + qsort(*range, *ranges, sizeof(struct memory_range), compare_ranges); + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + goto err; + + ret = 0; + +err: + xc_interface_close(xc); + free(e820entries); + return ret; +} +#else +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit) +{ + unsigned long long end, start; + unsigned type; + + start = crash_memory_range[*nr_ranges].start; + end = crash_memory_range[*nr_ranges].end; + type = crash_memory_range[*nr_ranges].type; + + if (!(lowmem_limit && lowmem_limit > start && lowmem_limit < end)) + return; + + crash_memory_range[*nr_ranges].end = lowmem_limit - 1; + + if (*nr_ranges >= CRASH_MAX_MEMORY_RANGES - 1) + return; + + ++*nr_ranges; + + crash_memory_range[*nr_ranges].start = lowmem_limit; + crash_memory_range[*nr_ranges].end = end; + crash_memory_range[*nr_ranges].type = type; +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end) +{ + int i, j, tidx = -1; + struct memory_range temp_region = {0, 0, 0}; + + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (start < mend && end > mstart) { + if (start != mstart && end != mend) { + /* Split memory region */ + crash_memory_range[i].end = start - 1; + temp_region.start = end + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (start != mstart) + crash_memory_range[i].end = start - 1; + else + crash_memory_range[i].start = end + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == CRASH_MAX_MEMORY_RANGES) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx] = temp_region; + (*nr_ranges)++; + } + return 0; +} + +/* Adds a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int add_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size, int type) +{ + int i, j, nr_entries = 0, tidx = 0, align = 1024; + unsigned long long mstart, mend; + + /* Shrink to 1KiB alignment if needed. */ + if (type == RANGE_RAM && ((addr%align) || (size%align))) { + unsigned long long end = addr + size; + + printf("%s: RAM chunk %#llx - %#llx unaligned\n", __func__, addr, end); + addr = _ALIGN_UP(addr, align); + end = _ALIGN_DOWN(end, align); + if (addr >= end) + return -1; + size = end - addr; + printf("%s: RAM chunk shrunk to %#llx - %#llx\n", __func__, addr, end); + } + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + break; + if (mstart <= (addr+size-1) && mend >=addr) + /* Overlapping region. */ + return -1; + else if (addr > mend) + tidx = i+1; + } + /* Insert the memory region. */ + for (j = nr_entries-1; j >= tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx].start = addr; + memmap_p[tidx].end = addr + size - 1; + memmap_p[tidx].type = type; + *nr_memmap = nr_entries + 1; + + dbgprint_mem_range("Memmap after adding segment", memmap_p, *nr_memmap); + + return 0; +} + +/* Removes a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int delete_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size) +{ + int i, j, nr_entries = 0, tidx = -1, operation = 0, align = 1024; + unsigned long long mstart, mend; + struct memory_range temp_region; + + /* Do alignment check. */ + if ((addr%align) || (size%align)) + return -1; + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + /* List if full */ + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + /* Did not find the segment in the list. */ + return -1; + if (mstart <= addr && mend >= (addr + size - 1)) { + if (mstart == addr && mend == (addr + size - 1)) { + /* Exact match. Delete region */ + operation = -1; + tidx = i; + break; + } + if (mstart != addr && mend != (addr + size - 1)) { + /* Split in two */ + memmap_p[i].end = addr - 1; + temp_region.start = addr + size; + temp_region.end = mend; + temp_region.type = memmap_p[i].type; + operation = 1; + tidx = i; + break; + } + + /* No addition/deletion required. Adjust the existing.*/ + if (mstart != addr) { + memmap_p[i].end = addr - 1; + break; + } else { + memmap_p[i].start = addr + size; + break; + } + } + } + if ((operation == 1) && tidx >=0) { + /* Insert the split memory region. */ + for (j = nr_entries-1; j > tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx+1] = temp_region; + *nr_memmap = nr_entries + 1; + } + if ((operation == -1) && tidx >=0) { + /* Delete the exact match memory region. */ + for (j = i+1; j < CRASH_MAX_MEMMAP_NR; j++) + memmap_p[j-1] = memmap_p[j]; + memmap_p[j-1].start = memmap_p[j-1].end = 0; + *nr_memmap = nr_entries - 1; + } + + dbgprint_mem_range("Memmap after deleting segment", memmap_p, *nr_memmap); + + return 0; +} + +static void cmdline_add_memmap_internal(char *cmdline, unsigned long startk, + unsigned long endk, int type) +{ + int cmdlen, len; + char str_mmap[256], str_tmp[20]; + + strcpy (str_mmap, " memmap="); + ultoa((endk-startk), str_tmp); + strcat (str_mmap, str_tmp); + + if (type == RANGE_RAM) + strcat (str_mmap, "K@"); + else if (type == RANGE_RESERVED) + strcat (str_mmap, "K$"); + else if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + strcat (str_mmap, "K#"); + else if (type == RANGE_PRAM) + strcat (str_mmap, "K!"); + + ultoa(startk, str_tmp); + strcat (str_mmap, str_tmp); + strcat (str_mmap, "K"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); +} + +/* Adds the appropriate memmap= options to command line, indicating the + * memory regions the new kernel can use to boot into. */ +static int cmdline_add_memmap(char *cmdline, struct memory_range *memmap_p) +{ + int i, cmdlen, len; + unsigned long min_sizek = 100; + char str_mmap[256]; + + /* Exact map */ + strcpy(str_mmap, " memmap=exactmap"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + unsigned long startk, endk, type; + + startk = memmap_p[i].start/1024; + endk = (memmap_p[i].end + 1)/1024; + type = memmap_p[i].type; + + /* Only adding memory regions of RAM and ACPI and Persistent Mem */ + if (type != RANGE_RAM && + type != RANGE_ACPI && + type != RANGE_ACPI_NVS && + type != RANGE_PRAM) + continue; + + if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + endk = _ALIGN_UP(memmap_p[i].end + 1, 1024)/1024; + + if (!startk && !endk) + /* All regions traversed. */ + break; + + /* A RAM region is not worth adding if region size < 100K. + * It eats up precious command line length. */ + if (type == RANGE_RAM && (endk - startk) < min_sizek) + continue; + /* And do not add e820 reserved region either */ + cmdline_add_memmap_internal(cmdline, startk, endk, type); + } + + dbgprintf("Command line after adding memmap\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + +/* Adds the elfcorehdr= command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) +{ + int cmdlen, len, align = 1024; + char str[30], *ptr; + + /* Passing in elfcorehdr=xxxK format. Saves space required in cmdline. + * Ensure 1K alignment*/ + if (addr%align) + return -1; + addr = addr/align; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + + +/* + * This routine is specific to i386 architecture to maintain the + * backward compatibility, other architectures can use the per + * cpu version get_crash_notes_per_cpu() directly. + */ +static int get_crash_notes(int cpu, uint64_t *addr, uint64_t *len) +{ + const char *crash_notes = "/sys/kernel/crash_notes"; + char line[MAX_LINE]; + FILE *fp; + unsigned long vaddr; + int count; + + fp = fopen(crash_notes, "r"); + if (fp) { + if (fgets(line, sizeof(line), fp) != 0) { + count = sscanf(line, "%lx", &vaddr); + if (count != 1) + die("Cannot parse %s: %s\n", crash_notes, + strerror(errno)); + } + + *addr = x86__pa(vaddr + (cpu * MAX_NOTE_BYTES)); + *len = MAX_NOTE_BYTES; + + dbgprintf("crash_notes addr = %llx\n", + (unsigned long long)*addr); + + fclose(fp); + return 0; + } else + return get_crash_notes_per_cpu(cpu, addr, len); +} + +static enum coretype get_core_type(struct crash_elf_info *elf_info, + struct memory_range *range, int ranges) +{ + if ((elf_info->machine) == EM_X86_64) + return CORE_TYPE_ELF64; + else { + /* fall back to default */ + if (ranges == 0) + return CORE_TYPE_ELF64; + + if (range[ranges - 1].end > 0xFFFFFFFFUL) + return CORE_TYPE_ELF64; + else + return CORE_TYPE_ELF32; + } +} + +static int sysfs_efi_runtime_map_exist(void) +{ + DIR *dir; + + dir = opendir("/sys/firmware/efi/runtime-map"); + if (!dir) + return 0; + + closedir(dir); + return 1; +} + +/* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ +static void cmdline_add_efi(char *cmdline) +{ + uint64_t acpi_rsdp; + char acpi_rsdp_buf[MAX_LINE]; + + acpi_rsdp = get_acpi_rsdp(); + + if (!acpi_rsdp) + return; + + sprintf(acpi_rsdp_buf, " acpi_rsdp=0x%lx", acpi_rsdp); + if (strlen(cmdline) + strlen(acpi_rsdp_buf) > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + + strcat(cmdline, acpi_rsdp_buf); +} + +static void get_backup_area(struct kexec_info *info, + struct memory_range *range, int ranges) +{ + int i; + + /* Look for first 640 KiB RAM region. */ + for (i = 0; i < ranges; ++i) { + if (range[i].type != RANGE_RAM || range[i].end > 0xa0000) + continue; + + info->backup_src_start = range[i].start; + info->backup_src_size = range[i].end - range[i].start + 1; + + dbgprintf("%s: %016llx-%016llx : System RAM\n", __func__, + range[i].start, range[i].end); + + return; + } + + /* First 640 KiB RAM region not found. Assume defaults. */ + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_END - BACKUP_SRC_START + 1; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, bufsz, memsz, elfcorehdr; + int nr_ranges = 0, nr_memmap = 0, align = 1024, i; + struct memory_range *mem_range, *memmap_p; + struct crash_elf_info elf_info; + unsigned kexec_arch; + + memset(&elf_info, 0x0, sizeof(elf_info)); + + /* Constant parts of the elf_info */ + memset(&elf_info, 0, sizeof(elf_info)); + elf_info.data = ELFDATA2LSB; + + /* Get the architecture of the running kernel */ + kexec_arch = info->kexec_flags & KEXEC_ARCH_MASK; + if (kexec_arch == KEXEC_ARCH_DEFAULT) + kexec_arch = KEXEC_ARCH_NATIVE; + + /* Get the elf architecture of the running kernel */ + switch(kexec_arch) { + case KEXEC_ARCH_X86_64: + elf_info.machine = EM_X86_64; + break; + case KEXEC_ARCH_386: + elf_info.machine = EM_386; + elf_info.lowmem_limit = X86_MAXMEM; + elf_info.get_note_info = get_crash_notes; + break; + default: + fprintf(stderr, "unsupported crashdump architecture: %04x\n", + kexec_arch); + return -1; + } + + if (xen_present()) { + if (get_crash_memory_ranges_xen(&mem_range, &nr_ranges, + elf_info.lowmem_limit) < 0) + return -1; + } else + if (get_crash_memory_ranges(&mem_range, &nr_ranges, + info->kexec_flags, + elf_info.lowmem_limit) < 0) + return -1; + + get_backup_area(info, mem_range, nr_ranges); + + dbgprint_mem_range("CRASH MEMORY RANGES", mem_range, nr_ranges); + + /* + * if the core type has not been set on command line, set it here + * automatically + */ + if (arch_options.core_header_type == CORE_TYPE_UNDEF) { + arch_options.core_header_type = + get_core_type(&elf_info, mem_range, nr_ranges); + } + /* Get the elf class... */ + elf_info.class = ELFCLASS32; + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + elf_info.class = ELFCLASS64; + } + + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + if (get_kernel_paddr(info, &elf_info)) + return -1; + + if (get_kernel_vaddr_and_size(info, &elf_info)) + return -1; + + /* Memory regions which panic kernel can safely use to boot into */ + sz = (sizeof(struct memory_range) * CRASH_MAX_MEMMAP_NR); + memmap_p = xmalloc(sz); + memset(memmap_p, 0, sz); + add_memmap(memmap_p, &nr_memmap, info->backup_src_start, info->backup_src_size, RANGE_RAM); + for (i = 0; i < crash_reserved_mem_nr; i++) { + sz = crash_reserved_mem[i].end - crash_reserved_mem[i].start +1; + if (add_memmap(memmap_p, &nr_memmap, crash_reserved_mem[i].start, + sz, RANGE_RAM) < 0) { + free(memmap_p); + return ENOCRASHKERNEL; + } + } + + /* Create a backup region segment to store backup data*/ + if (!(info->kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + sz = _ALIGN(info->backup_src_size, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, -1); + dbgprintf("Created backup segment at 0x%lx\n", + info->backup_start); + if (delete_memmap(memmap_p, &nr_memmap, info->backup_start, sz) < 0) { + free(tmp); + free(memmap_p); + return EFAILED; + } + } + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + /* the size of the elf headers allocated is returned in 'bufsz' */ + + /* Hack: With some ld versions (GNU ld version 2.14.90.0.4 20030523), + * vmlinux program headers show a gap of two pages between bss segment + * and data segment but effectively kernel considers it as bss segment + * and overwrites the any data placed there. Hence bloat the memsz of + * elf core header segment to 16K to avoid being placed in such gaps. + * This is a makeshift solution until it is fixed in kernel. + */ + if (bufsz < (16*1024)) { + /* bufsize is big enough for all the PT_NOTE's and PT_LOAD's */ + memsz = 16*1024; + /* memsz will be the size of the memory hole we look for */ + } else { + memsz = bufsz; + } + elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, + max_addr, -1); + dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); + if (delete_memmap(memmap_p, &nr_memmap, elfcorehdr, memsz) < 0) { + free(memmap_p); + return -1; + } + if (!bzImage_support_efi_boot || arch_options.noefi || + !sysfs_efi_runtime_map_exist()) + cmdline_add_efi(mod_cmdline); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + /* Inform second kernel about the presence of ACPI tables. */ + for (i = 0; i < nr_ranges; i++) { + unsigned long start, end, size, type; + if ( !( mem_range[i].type == RANGE_ACPI + || mem_range[i].type == RANGE_ACPI_NVS + || mem_range[i].type == RANGE_RESERVED + || mem_range[i].type == RANGE_PMEM + || mem_range[i].type == RANGE_PRAM)) + continue; + start = mem_range[i].start; + end = mem_range[i].end; + type = mem_range[i].type; + size = end - start + 1; + add_memmap(memmap_p, &nr_memmap, start, size, type); + } + + if (arch_options.pass_memmap_cmdline) + cmdline_add_memmap(mod_cmdline, memmap_p); + + /* Store 2nd kernel boot memory ranges for later reference in + * x86-setup-linux.c: setup_linux_system_parameters() */ + info->crash_range = memmap_p; + info->nr_crash_ranges = nr_memmap; + + return 0; +} + +/* On x86, the kernel may make a low reservation in addition to the + * normal reservation. However, the kernel refuses to load the panic + * kernel to low memory, so always choose the highest range. + */ +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!crash_reserved_mem_nr) + return -1; + + *start = crash_reserved_mem[crash_reserved_mem_nr - 1].start; + *end = crash_reserved_mem[crash_reserved_mem_nr - 1].end; + + return 0; +} + +static int crashkernel_mem_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + if (nr >= CRASH_RESERVED_MEM_NR) + return 1; + + crash_reserved_mem[nr].start = base; + crash_reserved_mem[nr].end = base + length - 1; + crash_reserved_mem[nr].type = RANGE_RAM; + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int ret; + + if (xen_present()) { + uint64_t start, end; + + ret = xen_get_crashkernel_region(&start, &end); + if (ret < 0) + return 0; + + crash_reserved_mem[0].start = start; + crash_reserved_mem[0].end = end; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } else { + ret = kexec_iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + } + + return !!crash_reserved_mem_nr; +} diff --git a/kexec/arch/i386/crashdump-x86.h b/kexec/arch/i386/crashdump-x86.h new file mode 100644 index 0000000..479a549 --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.h @@ -0,0 +1,33 @@ +#ifndef CRASHDUMP_X86_H +#define CRASHDUMP_X86_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); + +#define X86_PAGE_OFFSET 0xc0000000 +#define x86__pa(x) ((unsigned long)(x)-X86_PAGE_OFFSET) + +#define X86__VMALLOC_RESERVE (128 << 20) +#define X86_MAXMEM (-X86_PAGE_OFFSET-X86__VMALLOC_RESERVE) + +#define X86_64__START_KERNEL_map 0xffffffff80000000ULL +#define X86_64_PAGE_OFFSET_PRE_2_6_27 0xffff810000000000ULL +#define X86_64_PAGE_OFFSET_PRE_4_20_0 0xffff880000000000ULL +#define X86_64_PAGE_OFFSET 0xffff888000000000ULL + +#define X86_64_MAXMEM 0x3fffffffffffUL + +/* Kernel text size */ +#define X86_64_KERNEL_TEXT_SIZE (512UL*1024*1024) + +#define CRASH_MAX_MEMMAP_NR 1024 + +#define CRASH_MAX_MEMORY_RANGES 32768 + +/* Backup Region, First 640K of System RAM. */ +#define BACKUP_SRC_START 0x00000000 +#define BACKUP_SRC_END 0x0009ffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* CRASHDUMP_X86_H */ diff --git a/kexec/arch/i386/include/arch/options.h b/kexec/arch/i386/include/arch/options.h new file mode 100644 index 0000000..89e0a95 --- /dev/null +++ b/kexec/arch/i386/include/arch/options.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_ARCH_I386_OPTIONS_H +#define KEXEC_ARCH_I386_OPTIONS_H + +/* + ************************************************************************* + * NOTE NOTE NOTE + * This file is included for i386 builds *and* x86_64 builds (which build + * both x86_64 and i386 loaders). + * It contains the combined set of options used by i386 and x86_64. + ************************************************************************* + */ + +#define OPT_RESET_VGA (OPT_MAX+0) +#define OPT_SERIAL (OPT_MAX+1) +#define OPT_SERIAL_BAUD (OPT_MAX+2) +#define OPT_CONSOLE_VGA (OPT_MAX+3) +#define OPT_CONSOLE_SERIAL (OPT_MAX+4) +#define OPT_ELF32_CORE (OPT_MAX+5) +#define OPT_ELF64_CORE (OPT_MAX+6) +#define OPT_ARCH_MAX (OPT_MAX+7) + +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_ARGS_ELF (OPT_ARCH_MAX+3) +#define OPT_ARGS_LINUX (OPT_ARCH_MAX+4) +#define OPT_ARGS_NONE (OPT_ARCH_MAX+5) +#define OPT_CL (OPT_ARCH_MAX+6) +#define OPT_MOD (OPT_ARCH_MAX+7) +#define OPT_VGA (OPT_ARCH_MAX+8) +#define OPT_REAL_MODE (OPT_ARCH_MAX+9) +#define OPT_ENTRY_32BIT (OPT_ARCH_MAX+10) +#define OPT_PASS_MEMMAP_CMDLINE (OPT_ARCH_MAX+11) +#define OPT_NOEFI (OPT_ARCH_MAX+12) +#define OPT_REUSE_VIDEO_TYPE (OPT_ARCH_MAX+13) +#define OPT_DTB (OPT_ARCH_MAX+14) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "reset-vga", 0, 0, OPT_RESET_VGA }, \ + { "serial", 1, 0, OPT_SERIAL }, \ + { "serial-baud", 1, 0, OPT_SERIAL_BAUD }, \ + { "console-vga", 0, 0, OPT_CONSOLE_VGA }, \ + { "console-serial", 0, 0, OPT_CONSOLE_SERIAL }, \ + { "elf32-core-headers", 0, 0, OPT_ELF32_CORE }, \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "pass-memmap-cmdline", 0, 0, OPT_PASS_MEMMAP_CMDLINE }, \ + { "noefi", 0, 0, OPT_NOEFI}, \ + { "reuse-video-type", 0, 0, OPT_REUSE_VIDEO_TYPE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "args-elf", 0, NULL, OPT_ARGS_ELF }, \ + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, \ + { "args-none", 0, NULL, OPT_ARGS_NONE }, \ + { "module", 1, 0, OPT_MOD }, \ + { "real-mode", 0, NULL, OPT_REAL_MODE }, \ + { "entry-32bit", 0, NULL, OPT_ENTRY_32BIT }, \ + { "dtb", 1, NULL, OPT_DTB }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_I386_OPTIONS_H */ + diff --git a/kexec/arch/i386/kexec-beoboot-x86.c b/kexec/arch/i386/kexec-beoboot-x86.c new file mode 100644 index 0000000..d949ab8 --- /dev/null +++ b/kexec/arch/i386/kexec-beoboot-x86.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------ -*- C -*- + * Eric Biederman <ebiederman@xmission.com> + * Erik Arjan Hendriks <hendriks@lanl.gov> + * + * 14 December 2004 + * This file is a derivative of the beoboot image loader, modified + * to work with kexec. + * + * This version is derivative from the orignal mkbootimg.c which is + * Copyright (C) 2000 Scyld Computing Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + + * + *--------------------------------------------------------------------*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <x86/x86-linux.h> +#include <boot/beoboot.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include <arch/options.h> + +int beoboot_probe(const char *buf, off_t len) +{ + struct beoboot_header bb_header; + const char *cmdline, *kernel; + int result; + if ((uintmax_t)len < (uintmax_t)sizeof(bb_header)) { + return -1; + } + memcpy(&bb_header, buf, sizeof(bb_header)); + if (memcmp(bb_header.magic, BEOBOOT_MAGIC, 4) != 0) { + return -1; + } + if (bb_header.arch != BEOBOOT_ARCH) { + return -1; + } + /* Make certain a bzImage is packed into there. + */ + cmdline = buf + sizeof(bb_header); + kernel = cmdline + bb_header.cmdline_size; + result = bzImage_probe(kernel, bb_header.kernel_size); + + return result; +} + +void beoboot_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + ); + + /* No parameters are parsed */ +} + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +int beoboot_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct beoboot_header bb_header; + const char *command_line, *kernel, *initrd; + + int real_mode_entry; + int opt; + int result; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "real-mode", 0, 0, OPT_REAL_MODE }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + real_mode_entry = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + } + } + + + /* + * Parse the file + */ + memcpy(&bb_header, buf, sizeof(bb_header)); + command_line = buf + sizeof(bb_header); + kernel = command_line + bb_header.cmdline_size; + initrd = NULL; + if (bb_header.flags & BEOBOOT_INITRD_PRESENT) { + initrd = kernel + bb_header.kernel_size; + } + + result = do_bzImage_load(info, + kernel, bb_header.kernel_size, + command_line, bb_header.cmdline_size, + initrd, bb_header.initrd_size, + 0, 0, real_mode_entry); + + return result; +} + diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c new file mode 100644 index 0000000..1b8f20c --- /dev/null +++ b/kexec/arch/i386/kexec-bzImage.c @@ -0,0 +1,471 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2010 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; +int bzImage_support_efi_boot = 0; + +int bzImage_probe(const char *buf, off_t len) +{ + const struct x86_linux_header *header; + if ((uintmax_t)len < (uintmax_t)(2 * 512)) { + if (probe_debug) { + fprintf(stderr, "File is too short to be a bzImage!\n"); + } + return -1; + } + header = (const struct x86_linux_header *)buf; + if (memcmp(header->header_magic, "HdrS", 4) != 0) { + if (probe_debug) { + fprintf(stderr, "Not a bzImage\n"); + } + return -1; + } + if (header->boot_sector_magic != 0xAA55) { + if (probe_debug) { + fprintf(stderr, "No x86 boot sector present\n"); + } + /* No x86 boot sector present */ + return -1; + } + if (header->protocol_version < 0x0200) { + if (probe_debug) { + fprintf(stderr, "Must be at least protocol version 2.00\n"); + } + /* Must be at least protocol version 2.00 */ + return -1; + } + if ((header->loadflags & 1) == 0) { + if (probe_debug) { + fprintf(stderr, "zImage not a bzImage\n"); + } + /* Not a bzImage */ + return -1; + } + /* I've got a bzImage */ + if (probe_debug) { + fprintf(stderr, "It's a bzImage\n"); + } + return 0; +} + + +void bzImage_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --dtb=FILE Use FILE as devicetree.\n" + ); + +} + +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry) +{ + struct x86_linux_header setup_header; + struct x86_linux_param_header *real_mode; + int setup_sects; + size_t size; + int kern16_size; + unsigned long setup_base, setup_size; + struct entry32_regs regs32; + struct entry16_regs regs16; + unsigned int relocatable_kernel = 0; + unsigned long kernel32_load_addr; + char *modified_cmdline; + unsigned long cmdline_end; + unsigned long kern16_size_needed; + unsigned long heap_size = 0; + + /* + * Find out about the file I am about to load. + */ + if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512)) { + return -1; + } + memcpy(&setup_header, kernel, sizeof(setup_header)); + setup_sects = setup_header.setup_sects; + if (setup_sects == 0) { + setup_sects = 4; + } + + kern16_size = (setup_sects +1) *512; + if (kernel_len < kern16_size) { + fprintf(stderr, "BzImage truncated?\n"); + return -1; + } + + if (setup_header.protocol_version >= 0x0206) { + if ((uintmax_t)command_line_len > + (uintmax_t)setup_header.cmdline_size) { + dbgprintf("Kernel command line too long for kernel!\n"); + return -1; + } + } else { + if (command_line_len > 255) { + dbgprintf("WARNING: This kernel may only support 255 byte command lines\n"); + } + } + + if (setup_header.protocol_version >= 0x0205) { + relocatable_kernel = setup_header.relocatable_kernel; + dbgprintf("bzImage is relocatable\n"); + } + + /* Can't use bzImage for crash dump purposes with real mode entry */ + if((info->kexec_flags & KEXEC_ON_CRASH) && real_mode_entry) { + fprintf(stderr, "Can't use bzImage for crash dump purposes" + " with real mode entry\n"); + return -1; + } + + if((info->kexec_flags & KEXEC_ON_CRASH) && !relocatable_kernel) { + fprintf(stderr, "BzImage is not relocatable. Can't be used" + " as capture kernel.\n"); + return -1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. load_crashdump_segments will take care of + * loading the segments as high in memory as possible, hence + * in turn as away as possible from kernel to avoid being + * stomped by the kernel. + */ + if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0) + return -1; + + /* Use new command line buffer */ + command_line = modified_cmdline; + command_line_len = strlen(command_line) +1; + } + + /* Load the trampoline. This must load at a higher address + * than the argument/parameter segment or the kernel will stomp + * it's gdt. + * + * x86_64 purgatory code has got relocations type R_X86_64_32S + * that means purgatory got to be loaded within first 2G otherwise + * overflow takes place while applying relocations. + */ + if (!real_mode_entry && relocatable_kernel) + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 0x7fffffff, -1, 0); + else + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 640*1024, -1, 0); + dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr); + + /* The argument/parameter segment */ + if (real_mode_entry) { + /* need to include size for bss and heap etc */ + if (setup_header.protocol_version >= 0x0201) + kern16_size_needed = setup_header.heap_end_ptr; + else + kern16_size_needed = kern16_size + 8192; /* bss */ + if (kern16_size_needed < kern16_size) + kern16_size_needed = kern16_size; + if (kern16_size_needed > 0xfffc) + die("kern16_size_needed is more then 64k\n"); + heap_size = 0xfffc - kern16_size_needed; /* less 64k */ + heap_size = _ALIGN_DOWN(heap_size, 0x200); + kern16_size_needed += heap_size; + } else { + kern16_size_needed = kern16_size; + /* need to bigger than size of struct bootparams */ + if (kern16_size_needed < 4096) + kern16_size_needed = 4096; + } + setup_size = kern16_size_needed + command_line_len + + PURGATORY_CMDLINE_SIZE; + real_mode = xmalloc(setup_size); + memset(real_mode, 0, setup_size); + if (!real_mode_entry) { + unsigned long setup_header_size = kernel[0x201] + 0x202 - 0x1f1; + + /* only copy setup_header */ + if (setup_header_size > 0x7f) + setup_header_size = 0x7f; + memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1, + setup_header_size); + } else { + /* copy setup code and setup_header */ + memcpy(real_mode, kernel, kern16_size); + } + + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + /* If using bzImage for capture kernel, then we will not be + * executing real mode code. setup segment can be loaded + * anywhere as we will be just reading command line. + */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, -1, -1); + } + else if (real_mode->protocol_version >= 0x0200) { + /* Careful setup_base must be greater than 8K */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, 640*1024, 1); + } else { + add_segment(info, real_mode, setup_size, SETUP_BASE, setup_size); + setup_base = SETUP_BASE; + } + dbgprintf("Loaded real-mode code and command line at 0x%lx\n", + setup_base); + + /* Verify purgatory loads higher than the parameters */ + if (info->rhdr.rel_addr < setup_base) { + die("Could not put setup code above the kernel parameters\n"); + } + + /* The main kernel segment */ + size = kernel_len - kern16_size; + + if (real_mode->protocol_version >=0x0205 && relocatable_kernel) { + /* Relocatable bzImage */ + unsigned long kern_align = real_mode->kernel_alignment; + unsigned long kernel32_max_addr = DEFAULT_BZIMAGE_ADDR_MAX; + + if (kernel32_max_addr > real_mode->initrd_addr_max) + kernel32_max_addr = real_mode->initrd_addr_max; + + kernel32_load_addr = add_buffer(info, kernel + kern16_size, + size, size, kern_align, + 0x100000, kernel32_max_addr, + 1); + } + else { + kernel32_load_addr = KERN32_BASE; + add_segment(info, kernel + kern16_size, size, + kernel32_load_addr, size); + } + + dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr); + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, real_mode, setup_base, + kern16_size_needed, command_line, command_line_len, + initrd, initrd_len); + + if (real_mode_entry && real_mode->protocol_version >= 0x0201) { + real_mode->loader_flags |= 0x80; /* CAN_USE_HEAP */ + real_mode->heap_end_ptr += heap_size - 0x200; /*stack*/ + } + + /* Get the initial register values */ + if (real_mode_entry) + elf_rel_get_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + + /* + * Initialize the 32bit start information. + */ + regs32.eax = 0; /* unused */ + regs32.ebx = 0; /* 0 == boot not AP processor start */ + regs32.ecx = 0; /* unused */ + regs32.edx = 0; /* unused */ + regs32.esi = setup_base; /* kernel parameters */ + regs32.edi = 0; /* unused */ + regs32.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* stack, unused */ + regs32.ebp = 0; /* unused */ + regs32.eip = kernel32_load_addr; /* kernel entry point */ + + /* + * Initialize the 16bit start information. + */ + if (real_mode_entry) { + regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base >> 4; + regs16.cs = regs16.ds + 0x20; + regs16.ip = 0; + /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */ + regs16.ss = (elf_rel_get_addr(&info->rhdr, "stack_end") - 64*1024) >> 4; + /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */ + regs16.esp = 0xFFFC; + + printf("Starting the kernel in real mode\n"); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + real_mode->kernel_start = kernel32_load_addr; + } + if (real_mode_entry && kexec_debug) { + unsigned long entry16_debug, pre32, first32; + uint32_t old_first32; + /* Find the location of the symbols */ + entry16_debug = elf_rel_get_addr(&info->rhdr, "entry16_debug"); + pre32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_pre32"); + first32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_first32"); + + /* Hook all of the linux kernel hooks */ + real_mode->rmode_switch_cs = entry16_debug >> 4; + real_mode->rmode_switch_ip = pre32 - entry16_debug; + old_first32 = real_mode->kernel_start; + real_mode->kernel_start = first32; + elf_rel_set_symbol(&info->rhdr, "entry16_debug_old_first32", + &old_first32, sizeof(old_first32)); + + regs32.eip = entry16_debug; + } + if (real_mode_entry) { + elf_rel_set_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + elf_rel_set_symbol(&info->rhdr, "entry16_debug_regs", + ®s16, sizeof(regs16)); + } + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + cmdline_end = setup_base + kern16_size_needed + command_line_len - 1; + elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end, + sizeof(unsigned long)); + + /* Fill in the information BIOS calls would normally provide. */ + if (!real_mode_entry) { + setup_linux_system_parameters(info, real_mode); + } + + if (dtb) { + setup_linux_dtb(info, real_mode, dtb, dtb_len); + } + + return 0; +} + +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line = NULL; + char *tmp_cmdline = NULL; + const char *ramdisk, *append = NULL, *dtb; + char *ramdisk_buf; + off_t ramdisk_length; + int command_line_len; + int real_mode_entry; + int opt; + int result; + char *dtb_buf; + off_t dtb_length; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "dtb", 1, 0, OPT_DTB }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + real_mode_entry = 0; + ramdisk = 0; + ramdisk_length = 0; + dtb = 0; + dtb_length = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + case OPT_DTB: + dtb = optarg; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + ramdisk_buf = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + dtb_buf = 0; + if (dtb) { + dtb_buf = slurp_file(dtb, &dtb_length); + } + result = do_bzImage_load(info, + buf, len, + command_line, command_line_len, + ramdisk_buf, ramdisk_length, + dtb_buf, dtb_length, + real_mode_entry); + + free(command_line); + return result; +} diff --git a/kexec/arch/i386/kexec-elf-rel-x86.c b/kexec/arch/i386/kexec-elf-rel-x86.c new file mode 100644 index 0000000..55a214e --- /dev/null +++ b/kexec/arch/i386/kexec-elf-rel-x86.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if ((ehdr->e_machine != EM_386) && (ehdr->e_machine != EM_486)) + { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_386_32: + *((uint32_t *)location) += value; + break; + case R_386_PC32: + *((uint32_t *)location) += value - address; + break; + default: + die("Unknown rel relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/i386/kexec-elf-x86.c b/kexec/arch/i386/kexec-elf-x86.c new file mode 100644 index 0000000..8eba242 --- /dev/null +++ b/kexec/arch/i386/kexec-elf-x86.c @@ -0,0 +1,331 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "x86-linux-setup.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; + +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + if (probe_debug) { + fprintf(stderr, "Not an ELF executable\n"); + } + goto out; + } + + /* Verify the architecuture specific bits */ + switch (arch) { + case CORE_TYPE_ELF32: + if ((ehdr.e_machine != EM_386) && (ehdr.e_machine != EM_486)) { + if (probe_debug) + fprintf(stderr, "Not i386 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_ELF64: + if (ehdr.e_machine != EM_X86_64) { + if (probe_debug) + fprintf(stderr, "Not x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_UNDEF: + default: + if ( + (ehdr.e_machine != EM_386) && + (ehdr.e_machine != EM_486) && + (ehdr.e_machine != EM_X86_64) + ) { + if (probe_debug) + fprintf(stderr, "Not i386 or x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +int elf_x86_probe(const char *buf, off_t len) { + return elf_x86_any_probe(buf, len, CORE_TYPE_ELF32); +} + +void elf_x86_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --args-linux Pass linux kernel style options\n" + " --args-elf Pass elf boot notes\n" + ); + + +} + +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line = NULL, *modified_cmdline = NULL; + const char *append = NULL; + char *tmp_cmdline = NULL; + const char *error_msg = NULL; + int result; + int command_line_len; + const char *ramdisk; + unsigned long entry, max_addr; + int arg_style; +#define ARG_STYLE_ELF 0 +#define ARG_STYLE_LINUX 1 +#define ARG_STYLE_NONE 2 + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "args-elf", 0, NULL, OPT_ARGS_ELF }, + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, + { "args-none", 0, NULL, OPT_ARGS_NONE }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + arg_style = ARG_STYLE_ELF; + ramdisk = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_ARGS_ELF: + arg_style = ARG_STYLE_ELF; + break; + case OPT_ARGS_LINUX: + arg_style = ARG_STYLE_LINUX; + break; + case OPT_ARGS_NONE: +#ifdef __i386__ + arg_style = ARG_STYLE_NONE; +#else + die("--args-none only works on arch i386\n"); +#endif + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Do we want arguments? */ + if (arg_style != ARG_STYLE_NONE) { + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + } + if (arg_style == ARG_STYLE_NONE) { + info->entry = (void *)entry; + + } + else if (arg_style == ARG_STYLE_ELF) { + unsigned long note_base; + struct entry32_regs regs; + uint32_t arg1, arg2; + + /* Setup the ELF boot notes */ + note_base = elf_boot_notes(info, max_addr, + command_line, command_line_len); + + /* Initialize the stack arguments */ + arg2 = 0; /* No return address */ + arg1 = note_base; + elf_rel_set_symbol(&info->rhdr, "stack_arg32_1", &arg1, sizeof(arg1)); + elf_rel_set_symbol(&info->rhdr, "stack_arg32_2", &arg2, sizeof(arg2)); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_arg32_2"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + if (ramdisk) { + error_msg = "Ramdisks not supported with generic elf arguments"; + goto out; + } + } + else if (arg_style == ARG_STYLE_LINUX) { + struct x86_linux_faked_param_header *hdr; + unsigned long param_base; + const char *ramdisk_buf; + off_t ramdisk_length; + struct entry32_regs regs; + int rc = 0; + + /* Get the linux parameter header */ + hdr = xmalloc(sizeof(*hdr)); + + /* Hack: With some ld versions, vmlinux program headers show + * a gap of two pages between bss segment and data segment + * but effectively kernel considers it as bss segment and + * overwrites the any data placed there. Hence bloat the + * memsz of parameter segment to 16K to avoid being placed + * in such gaps. + * This is a makeshift solution until it is fixed in kernel + */ + param_base = add_buffer(info, hdr, sizeof(*hdr), 16*1024, + 16, 0, max_addr, 1); + + /* Initialize the parameter header */ + memset(hdr, 0, sizeof(*hdr)); + init_linux_parameters(&hdr->hdr); + + /* Add a ramdisk to the current image */ + ramdisk_buf = NULL; + ramdisk_length = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + rc = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (rc < 0) { + result = -1; + goto out; + } + /* Use new command line. */ + free(command_line); + command_line = modified_cmdline; + command_line_len = strlen(modified_cmdline) + 1; + modified_cmdline = NULL; + } + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, &hdr->hdr, param_base, + offsetof(struct x86_linux_faked_param_header, command_line), + command_line, command_line_len, + ramdisk_buf, ramdisk_length); + + /* Fill in the information bios calls would usually provide */ + setup_linux_system_parameters(info, &hdr->hdr); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.ebx = 0; /* Bootstrap processor */ + regs.esi = param_base; /* Pointer to the parameters */ + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + } + else { + error_msg = "Unknown argument style\n"; + } + +out: + free(command_line); + free(modified_cmdline); + if (error_msg) + die("%s", error_msg); + return result; +} diff --git a/kexec/arch/i386/kexec-mb2-x86.c b/kexec/arch/i386/kexec-mb2-x86.c new file mode 100644 index 0000000..0d2e93b --- /dev/null +++ b/kexec/arch/i386/kexec-mb2-x86.c @@ -0,0 +1,616 @@ +/* + * kexec-mb2-x86.c + * + * multiboot2 support for kexec to boot xen. + * + * Copyright (C) 2019 Varad Gautam (vrd at amazon.de), Amazon.com, Inc. or its affiliates. + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts taken from kexec-multiboot-x86.c, Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include "../../kexec-syscall.h" +#include "../../kexec-xen.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/multiboot2.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +struct multiboot2_header_info { + struct multiboot_header_tag_information_request *request_tag; + struct multiboot_header_tag_address *addr_tag; + struct multiboot_header_tag_entry_address *entry_addr_tag; + struct multiboot_header_tag_console_flags *console_tag; + struct multiboot_header_tag_framebuffer *fb_tag; + struct multiboot_header_tag_module_align *mod_align_tag; + struct multiboot_header_tag_relocatable *rel_tag; +} mhi; + +#define ALIGN_UP(addr, align) \ + ((addr + (typeof (addr)) align - 1) & ~((typeof (addr)) align - 1)) + +int multiboot2_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + + /* First of all, check that this is an ELF file for either x86 or x86-64 */ + i = elf_x86_any_probe(buf, buf_len, CORE_TYPE_UNDEF); + if (i < 0) + return i; + + /* Now look for a multiboot header. */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) + len = buf_len; + + memcpy(headerbuf, buf, len); + if (len < sizeof(struct multiboot_header)) { + /* Short file */ + return -1; + } + for (mbh = (struct multiboot_header *) headerbuf; + ((char *) mbh <= (char *) headerbuf + len - sizeof(struct multiboot_header)); + mbh = (struct multiboot_header *) ((char *) mbh + MULTIBOOT_HEADER_ALIGN)) { + if (mbh->magic == MULTIBOOT2_HEADER_MAGIC + && !((mbh->magic+mbh->architecture+mbh->header_length+mbh->checksum) & 0xffffffff)) { + /* Found multiboot header. */ + return 0; + } + } + /* Not multiboot */ + return -1; +} + +void multiboot2_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + +static size_t +multiboot2_get_mbi_size(int ranges, int cmdline_size, int modcount, int modcmd_size) +{ + size_t mbi_size; + + mbi_size = (2 * sizeof (uint32_t) /* u32 total_size, u32 reserved */ + + ALIGN_UP (sizeof (struct multiboot_tag_basic_meminfo), MULTIBOOT_TAG_ALIGN) + + ALIGN_UP ((sizeof (struct multiboot_tag_mmap) + + ranges * sizeof (struct multiboot_mmap_entry)), MULTIBOOT_TAG_ALIGN) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (cmdline_size, MULTIBOOT_TAG_ALIGN)) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1, MULTIBOOT_TAG_ALIGN)) + + (modcount * sizeof (struct multiboot_tag_module) + modcmd_size)) + + sizeof (struct multiboot_tag); /* end tag */ + + if (mhi.rel_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_load_base_addr), MULTIBOOT_TAG_ALIGN); + + if (mhi.fb_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_framebuffer), MULTIBOOT_TAG_ALIGN); + + return mbi_size; +} + +static void multiboot2_read_header_tags(void) +{ + struct multiboot_header_tag *tag; + + for (tag = (struct multiboot_header_tag *) (mbh + 1); + tag->type != MULTIBOOT_TAG_TYPE_END; + tag = (struct multiboot_header_tag *) ((char *) tag + ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN))) + { + switch (tag->type) + { + case MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST: + { + mhi.request_tag = (struct multiboot_header_tag_information_request *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_RELOCATABLE: + { + mhi.rel_tag = (struct multiboot_header_tag_relocatable *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ADDRESS: + { + mhi.addr_tag = (struct multiboot_header_tag_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS: + { + mhi.entry_addr_tag = (struct multiboot_header_tag_entry_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS: + { + mhi.console_tag = (struct multiboot_header_tag_console_flags *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_FRAMEBUFFER: + { + mhi.fb_tag = (struct multiboot_header_tag_framebuffer *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_MODULE_ALIGN: + { + mhi.mod_align_tag = (struct multiboot_header_tag_module_align *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64: + case MULTIBOOT_HEADER_TAG_EFI_BS: + /* Ignoring EFI. */ + break; + default: + { + if (!(tag->flags & MULTIBOOT_HEADER_TAG_OPTIONAL)) + fprintf(stderr, "unsupported tag: 0x%x", tag->type); + break; + } + } + } +} + +struct multiboot_mmap_entry *multiboot_construct_memory_map(struct memory_range *range, + int ranges, + unsigned long long *mem_lower, + unsigned long long *mem_upper) +{ + struct multiboot_mmap_entry *entries; + int i; + + *mem_lower = *mem_upper = 0; + entries = xmalloc(ranges * sizeof(*entries)); + for (i = 0; i < ranges; i++) { + entries[i].addr = range[i].start; + entries[i].len = range[i].end - range[i].start + 1; + + if (range[i].type == RANGE_RAM) { + entries[i].type = MULTIBOOT_MEMORY_AVAILABLE; + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > *mem_lower)) { + range[i].start = 0; + *mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > *mem_upper + 0x100000)) + *mem_upper = range[i].end - 0x100000; + } else if (range[i].type == RANGE_ACPI) + entries[i].type = MULTIBOOT_MEMORY_ACPI_RECLAIMABLE; + else if (range[i].type == RANGE_ACPI_NVS) + entries[i].type = MULTIBOOT_MEMORY_NVS; + else if (range[i].type == RANGE_RESERVED) + entries[i].type = MULTIBOOT_MEMORY_RESERVED; + } + return entries; +} + +static uint64_t multiboot2_make_mbi(struct kexec_info *info, char *cmdline, int cmdline_len, + unsigned long load_base_addr, void *mbi_buf, size_t mbi_bytes) +{ + uint64_t *ptrorig = mbi_buf; + struct multiboot_mmap_entry *mmap_entries; + unsigned long long mem_lower = 0, mem_upper = 0; + + *ptrorig = mbi_bytes; /* u32 total_size, u32 reserved */ + ptrorig++; + + mmap_entries = multiboot_construct_memory_map(info->memory_range, info->memory_ranges, &mem_lower, &mem_upper); + { + struct multiboot_tag_basic_meminfo *tag = (struct multiboot_tag_basic_meminfo *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BASIC_MEMINFO; + tag->size = sizeof (struct multiboot_tag_basic_meminfo); + tag->mem_lower = mem_lower >> 10; + tag->mem_upper = mem_upper >> 10; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_mmap *tag = (struct multiboot_tag_mmap *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_MMAP; + tag->size = sizeof(struct multiboot_tag_mmap) + sizeof(struct multiboot_mmap_entry) * info->memory_ranges; + tag->entry_size = sizeof(struct multiboot_mmap_entry); + tag->entry_version = 0; + memcpy(tag->entries, mmap_entries, tag->entry_size * info->memory_ranges); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.rel_tag) { + struct multiboot_tag_load_base_addr *tag = (struct multiboot_tag_load_base_addr *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR; + tag->size = sizeof (struct multiboot_tag_load_base_addr); + tag->load_base_addr = load_base_addr; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_CMDLINE; + tag->size = sizeof (struct multiboot_tag_string) + cmdline_len; + memcpy(tag->string, cmdline, cmdline_len); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME; + tag->size = sizeof(struct multiboot_tag_string) + strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1; + sprintf(tag->string, "%s", BOOTLOADER " " BOOTLOADER_VERSION); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.fb_tag) { + struct multiboot_tag_framebuffer *tag = (struct multiboot_tag_framebuffer *) ptrorig; + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + tag->common.type = MULTIBOOT_TAG_TYPE_FRAMEBUFFER; + tag->common.size = sizeof(struct multiboot_tag_framebuffer); + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + tag->common.framebuffer_addr = 0xb8000; + tag->common.framebuffer_pitch = 80*2; + tag->common.framebuffer_width = 80; + tag->common.framebuffer_height = 25; + tag->common.framebuffer_bpp = 16; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + goto out; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + goto out; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + goto out; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + goto out; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + goto out; + } + + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + tag->common.framebuffer_addr = info.smem_start; + tag->common.framebuffer_pitch = info.line_length; + tag->common.framebuffer_width = mode.xres; + tag->common.framebuffer_height = mode.yres; + tag->common.framebuffer_bpp = mode.bits_per_pixel; + + tag->framebuffer_red_field_position = mode.red.offset; + tag->framebuffer_red_mask_size = mode.red.length; + tag->framebuffer_green_field_position = mode.green.offset; + tag->framebuffer_green_mask_size = mode.green.length; + tag->framebuffer_blue_field_position = mode.blue.offset; + tag->framebuffer_blue_mask_size = mode.blue.length; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + +out: + return (uint64_t) (uintptr_t) ptrorig; +} + +static uint64_t multiboot2_mbi_add_module(void *mbi_buf, uint64_t mbi_ptr, uint32_t mod_start, + uint32_t mod_end, char *mod_clp) +{ + struct multiboot_tag_module *tag = (struct multiboot_tag_module *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_MODULE; + tag->size = sizeof(struct multiboot_tag_module) + strlen((char *)(long) mod_clp) + 1; + tag->mod_start = mod_start; + tag->mod_end = mod_end; + + memcpy(tag->cmdline, (char *)(long) mod_clp, strlen((char *)(long) mod_clp) + 1); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static uint64_t multiboot2_mbi_end(void *mbi_buf, uint64_t mbi_ptr) +{ + struct multiboot_tag *tag = (struct multiboot_tag *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_END; + tag->size = sizeof (struct multiboot_tag); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static inline int multiboot2_rel_valid(struct multiboot_header_tag_relocatable *rel_tag, + uint64_t rel_start, uint64_t rel_end) +{ + if (rel_start >= rel_tag->min_addr && rel_end <= rel_tag->max_addr) + return 1; + + return 0; +} + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + void *mbi_buf; + size_t mbi_bytes; + unsigned long addr; + struct entry32_regs regs; + char *command_line = NULL, *tmp_cmdline = NULL; + int command_line_len; + char *imagename, *cp, *append = NULL;; + int opt; + int modules, mod_command_line_space; + uint64_t mbi_ptr; + char *mod_clp_base; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + uint64_t rel_min, rel_max; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) + { + fprintf(stderr, "Cannot find a loadable multiboot2 header.\n"); + return -1; + } + + /* Parse the header tags. */ + multiboot2_read_header_tags(); + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) + { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + + if (xen_present() && info->kexec_flags & KEXEC_LIVE_UPDATE ) { + if (!mhi.rel_tag) { + fprintf(stderr, "Multiboot2 image must be relocatable" + "for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + cmdline_add_liveupdate(&command_line); + } + + command_line_len = strlen(command_line) + 1; + + /* Load the ELF executable */ + if (mhi.rel_tag) { + rel_min = mhi.rel_tag->min_addr; + rel_max = mhi.rel_tag->max_addr; + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + /* TODO also check if elf is xen */ + /* On a live update, load target xen over the current xen image. */ + uint64_t xen_start, xen_end; + + xen_get_kexec_range(KEXEC_RANGE_MA_XEN, &xen_start, &xen_end); + if (multiboot2_rel_valid(mhi.rel_tag, xen_start, xen_end)) { + rel_min = xen_start; + } else { + fprintf(stderr, "Cannot place Elf into " + "KEXEC_RANGE_MA_XEN for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + elf_exec_build_load_relocatable(info, &ehdr, buf, len, 0, + rel_min, rel_max, mhi.rel_tag->align); + } else + elf_exec_build_load(info, &ehdr, buf, len, 0); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + uint64_t lu_start, lu_end; + + xen_get_kexec_range(7 /* KEXEC_RANGE_MA_LIVEUPDATE */, &lu_start, &lu_end); + /* Fit everything else into lu_start-lu_end. First page after lu_start is + * reserved for LU breadcrumb. */ + rel_min = lu_start + 4096; + rel_max = lu_end; + } else { + rel_min = 0x500; + rel_max = ULONG_MAX; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + rel_min, rel_max, 1, 0); + + /* Construct information tags. */ + mbi_bytes = multiboot2_get_mbi_size(info->memory_ranges, command_line_len, modules, mod_command_line_space); + mbi_buf = xmalloc(mbi_bytes); + + mbi_ptr = multiboot2_make_mbi(info, command_line, command_line_len, info->rhdr.rel_addr, mbi_buf, mbi_bytes); + free(command_line); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + if (multiboot2_rel_valid(mhi.rel_tag, rel_min, rel_max)) { + /* Shrink the reloc range to fit into LU region for xen. */ + mhi.rel_tag->min_addr = rel_min; + mhi.rel_tag->max_addr = rel_max; + } else { + fprintf(stderr, "Multiboot2 image cannot be relocated into " + "KEXEC_RANGE_MA_LIVEUPDATE for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + int i = 0; + + mod_clp_base = xmalloc(mod_command_line_space); + + /* Go back and parse the module command lines */ + mod_clp = mod_clp_base; + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in. Always page align. */ + addr = add_buffer(info, buf, mod_size, mod_size, getpagesize(), + rel_min, rel_max, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + mbi_ptr = multiboot2_mbi_add_module(mbi_buf, mbi_ptr, addr, addr + mod_size, mod_clp); + + mod_clp += strlen(mod_clp) + 1; + i++; + } + + free(mod_clp_base); + } + + mbi_ptr = multiboot2_mbi_end(mbi_buf, mbi_ptr); + + if (sort_segments(info) < 0) + return -1; + + addr = add_buffer(info, mbi_buf, mbi_bytes, mbi_bytes, 4, + rel_min, rel_max, 1); + + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = MULTIBOOT2_BOOTLOADER_MAGIC; + regs.ebx = addr; + regs.eip = ehdr.e_entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + return 0; +} diff --git a/kexec/arch/i386/kexec-multiboot-x86.c b/kexec/arch/i386/kexec-multiboot-x86.c new file mode 100644 index 0000000..33c885a --- /dev/null +++ b/kexec/arch/i386/kexec-multiboot-x86.c @@ -0,0 +1,505 @@ +/* + * kexec-multiboot-x86.c + * + * (partial) multiboot support for kexec. Only supports ELF32 + * kernels, and a subset of the multiboot info page options + * (i.e. enough to boot the Xen hypervisor). + * + * TODO: + * - smarter allocation of new segments + * - proper support for the MULTIBOOT_VIDEO_MODE bit + * + * + * Copyright (C) 2003 Tim Deegan (tjd21 at cl.cam.ac.uk) + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts copied from kexec-elf32-x86.c, written by Eric Biederman + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/mb_header.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +static off_t mbh_offset = 0; + +#define MIN(_x,_y) (((_x)<=(_y))?(_x):(_y)) + + +int multiboot_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + /* Now look for a multiboot header in the first 8KB */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) { + len = buf_len; + } + memcpy(headerbuf, buf, len); + if (len < 12) { + /* Short file */ + return -1; + } + for (mbh_offset = 0; mbh_offset <= (len - 12); mbh_offset += 4) + { + /* Search for a multiboot header */ + mbh = (struct multiboot_header *)(headerbuf + mbh_offset); + if (mbh->magic != MULTIBOOT_MAGIC + || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff)) + { + /* Not a multiboot header */ + continue; + } + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + if (mbh->load_addr & 0xfff) { + fprintf(stderr, "multiboot load address not 4k aligned\n"); + return -1; + } + if (mbh->load_addr > mbh->header_addr) { + fprintf(stderr, "multiboot header address > load address\n"); + return -1; + } + if (mbh->load_end_addr < mbh->load_addr) { + fprintf(stderr, "multiboot load end address < load address\n"); + return -1; + } + if (mbh->bss_end_addr < mbh->load_end_addr) { + fprintf(stderr, "multiboot bss end address < load end address\n"); + return -1; + } + if (mbh->load_end_addr - mbh->header_addr > buf_len - mbh_offset) { + fprintf(stderr, "multiboot file truncated\n"); + return -1; + } + if (mbh->entry_addr < mbh->load_addr || mbh->entry_addr >= mbh->load_end_addr) { + fprintf(stderr, "multiboot entry out of range\n"); + return -1; + } + } else { + if ((i=elf_x86_probe(buf, buf_len)) < 0) + return i; + } + if (mbh->flags & MULTIBOOT_UNSUPPORTED) { + /* Requires options we don't support */ + fprintf(stderr, + "Found a multiboot header, but it " + "requires multiboot options that I\n" + "don't understand. Sorry.\n"); + return -1; + } + /* Bootable */ + return 0; + } + /* Not multiboot */ + return -1; +} + + +void multiboot_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + + +static int framebuffer_info(struct multiboot_info *mbi) +{ + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + mbi->framebuffer_addr = 0xb8000; + mbi->framebuffer_pitch = 80*2; + mbi->framebuffer_width = 80; + mbi->framebuffer_height = 25; + mbi->framebuffer_bpp = 16; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + return -1; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + return -1; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + return -1; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + return -1; + } + + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + mbi->framebuffer_addr = info.smem_start; + mbi->framebuffer_pitch = info.line_length; + mbi->framebuffer_width = mode.xres; + mbi->framebuffer_height = mode.yres; + mbi->framebuffer_bpp = mode.bits_per_pixel; + mbi->framebuffer_red_field_position = mode.red.offset; + mbi->framebuffer_red_mask_size = mode.red.length; + mbi->framebuffer_green_field_position = mode.green.offset; + mbi->framebuffer_green_mask_size = mode.green.length; + mbi->framebuffer_blue_field_position = mode.blue.offset; + mbi->framebuffer_blue_mask_size = mode.blue.length; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; +} + +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +/* Marshal up a multiboot-style kernel */ +{ + struct multiboot_info *mbi; + void *mbi_buf; + struct mod_list *modp; + unsigned long freespace; + unsigned long long mem_lower = 0, mem_upper = 0; + struct mem_ehdr ehdr; + unsigned long mbi_base; + struct entry32_regs regs; + size_t mbi_bytes, mbi_offset; + char *command_line = NULL, *tmp_cmdline = NULL; + char *imagename, *cp, *append = NULL;; + struct memory_range *range; + int ranges; + struct AddrRangeDesc *mmap; + int command_line_len; + int i, result; + uint32_t u, entry; + int opt; + int modules, mod_command_line_space; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) { + fprintf(stderr, "Cannot find a loadable multiboot header.\n"); + return -1; + } + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + command_line = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = strlen(command_line) + 1; + + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + add_segment(info, + buf + (mbh_offset - (mbh->header_addr - mbh->load_addr)), + mbh->load_end_addr - mbh->load_addr, + mbh->load_addr, + mbh->bss_end_addr - mbh->load_addr); + entry = mbh->entry_addr; + } else { + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + entry = ehdr.e_entry; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, 0, + ULONG_MAX, 1, 0); + + /* The first segment will contain the multiboot headers: + * ============= + * multiboot information (mbi) + * ------------- + * kernel command line + * ------------- + * bootloader name + * ------------- + * module information entries + * ------------- + * module command lines + * ============== + */ + mbi_bytes = _ALIGN(sizeof(*mbi) + command_line_len + + strlen (BOOTLOADER " " BOOTLOADER_VERSION) + 1, 4); + mbi_buf = xmalloc(mbi_bytes); + mbi = mbi_buf; + memset(mbi, 0, sizeof(*mbi)); + sprintf(((char *)mbi) + sizeof(*mbi), "%s", command_line); + sprintf(((char *)mbi) + sizeof(*mbi) + command_line_len, "%s", + BOOTLOADER " " BOOTLOADER_VERSION); + mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME; + /* We'll relocate these to absolute addresses later. For now, + * all addresses within the first segment are relative to the + * start of the MBI. */ + mbi->cmdline = sizeof(*mbi); + mbi->boot_loader_name = sizeof(*mbi) + command_line_len; + + /* Memory map */ + range = info->memory_range; + ranges = info->memory_ranges; + mmap = xmalloc(ranges * sizeof(*mmap)); + for (i=0; i<ranges; i++) { + unsigned long long length; + length = range[i].end - range[i].start + 1; + /* Translate bzImage mmap to multiboot-speak */ + mmap[i].size = sizeof(mmap[i]) - 4; + mmap[i].base_addr_low = range[i].start & 0xffffffff; + mmap[i].base_addr_high = range[i].start >> 32; + mmap[i].length_low = length & 0xffffffff; + mmap[i].length_high = length >> 32; + switch (range[i].type) { + case RANGE_RAM: + mmap[i].Type = 1; /* RAM */ + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > mem_lower)) { + range[i].start = 0; + mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > mem_upper + 0x100000)) + mem_upper = range[i].end - 0x100000; + break; + case RANGE_ACPI: + mmap[i].Type = 3; + break; + case RANGE_ACPI_NVS: + mmap[i].Type = 4; + break; + case RANGE_RESERVED: + default: + mmap[i].Type = 2; /* Not RAM (reserved) */ + } + } + + if (mbh->flags & MULTIBOOT_MEMORY_INFO) { + /* Provide a copy of the memory map to the kernel */ + + mbi->flags |= MB_INFO_MEMORY | MB_INFO_MEM_MAP; + + freespace = add_buffer(info, + mmap, ranges * sizeof(*mmap), ranges * sizeof(*mmap), + 4, 0, 0xFFFFFFFFUL, 1); + + mbi->mmap_addr = freespace; + mbi->mmap_length = ranges * sizeof(*mmap); + + /* For kernels that care naught for fancy memory maps + * and just want the size of low and high memory */ + mbi->mem_lower = MIN(mem_lower>>10, 0xffffffff); + mbi->mem_upper = MIN(mem_upper>>10, 0xffffffff); + + /* done */ + } + + /* Video */ + if (mbh->flags & MULTIBOOT_VIDEO_MODE) { + if (framebuffer_info(mbi) < 0) + fprintf(stderr, "not providing framebuffer information.\n"); + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + + /* We'll relocate this to an absolute address later */ + mbi->mods_addr = mbi_bytes; + mbi->mods_count = 0; + mbi->flags |= MB_INFO_MODS; + + /* Add room for the module descriptors to the MBI buffer */ + mbi_bytes += (sizeof(*modp) * modules) + + mod_command_line_space; + mbi_buf = xrealloc(mbi_buf, mbi_bytes); + + /* mbi might have moved */ + mbi = mbi_buf; + /* module descriptors go in the newly added space */ + modp = ((void *)mbi) + mbi->mods_addr; + /* module command lines go after the descriptors */ + mod_clp = ((void *)modp) + (sizeof(*modp) * modules); + + /* Go back and parse the module command lines */ + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in */ + freespace = add_buffer(info, + buf, mod_size, mod_size, + getpagesize(), 0, 0xffffffffUL, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + modp->mod_start = freespace; + modp->mod_end = freespace + mod_size; + modp->cmdline = (void *)mod_clp - (void *)mbi; + modp->pad = 0; + + /* Done */ + mbi->mods_count++; + mod_clp += strlen(mod_clp) + 1; + modp++; + } + } + + /* Find a place for the MBI to live */ + if (sort_segments(info) < 0) { + result = -1; + goto out; + } + mbi_base = add_buffer(info, + mbi_buf, mbi_bytes, mbi_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + /* Relocate offsets in the MBI to absolute addresses */ + mbi_offset = mbi_base; + modp = ((void *)mbi) + mbi->mods_addr; + for (u = 0; u < mbi->mods_count; u++) { + modp[u].cmdline += mbi_offset; + } + mbi->mods_addr += mbi_offset; + mbi->cmdline += mbi_offset; + mbi->boot_loader_name += mbi_offset; + + /* Specify the initial CPU state and copy the setup code */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = 0x2BADB002; + regs.ebx = mbi_offset; + regs.eip = entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + +out: + free(command_line); + return result; +} + +/* + * EOF (kexec-multiboot-x86.c) + */ diff --git a/kexec/arch/i386/kexec-nbi.c b/kexec/arch/i386/kexec-nbi.c new file mode 100644 index 0000000..8eb2154 --- /dev/null +++ b/kexec/arch/i386/kexec-nbi.c @@ -0,0 +1,249 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "kexec-x86.h" +#include <arch/options.h> + +struct segheader +{ + uint8_t length; + uint8_t vendortag; + uint8_t reserved; + uint8_t flags; +#define NBI_SEG 0x3 +#define NBI_SEG_ABSOLUTE 0 +#define NBI_SEG_APPEND 1 +#define NBI_SEG_NEGATIVE 2 +#define NBI_SEG_PREPEND 3 +#define NBI_LAST_SEG (1 << 2) + uint32_t loadaddr; + uint32_t imglength; + uint32_t memlength; +}; + +struct imgheader +{ +#define NBI_MAGIC "\x36\x13\x03\x1b" + uint8_t magic[4]; +#define NBI_RETURNS (1 << 8) +#define NBI_ENTRY32 (1 << 31) + uint32_t length; /* and flags */ + struct { uint16_t bx, ds; } segoff; + union { + struct { uint16_t ip, cs; } segoff; + uint32_t linear; + } execaddr; +}; + + +static const int probe_debug = 0; + +int nbi_probe(const char *buf, off_t len) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + /* If we don't have enough data give up */ + if (((uintmax_t)len < (uintmax_t)sizeof(hdr)) || (len < 512)) { + return -1; + } + memcpy(&hdr, buf, sizeof(hdr)); + if (memcmp(hdr.magic, NBI_MAGIC, sizeof(hdr.magic)) != 0) { + return -1; + } + /* Ensure we have a properly sized header */ + if (((hdr.length & 0xf)*4) != sizeof(hdr)) { + if (probe_debug) { + fprintf(stderr, "NBI: Bad vendor header size\n"); + } + return -1; + } + /* Ensure the vendor header is not too large. + * This can't actually happen but.... + */ + if ((((hdr.length & 0xf0) >> 4)*4) > (512 - sizeof(hdr))) { + if (probe_debug) { + fprintf(stderr, "NBI: vendor headr too large\n"); + } + return -1; + } + /* Reserved bits are set in the image... */ + if ((hdr.length & 0x7ffffe00)) { + if (probe_debug) { + fprintf(stderr, "NBI: Reserved header bits set\n"); + } + return -1; + } + /* If the image can return refuse to load it */ + if (hdr.length & (1 << 8)) { + if (probe_debug) { + printf("NBI: image wants to return\n"); + } + return -1; + } + /* Now verify the segments all fit within 512 bytes */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + memcpy(&seg, buf + seg_off, sizeof(seg)); + if ((seg.length & 0xf) != 4) { + if (probe_debug) { + fprintf(stderr, "NBI: Invalid segment length\n"); + } + return -1; + } + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if (seg.flags & 0xf8) { + if (probe_debug) { + fprintf(stderr, "NBI: segment reserved flags set\n"); + } + return -1; + } + if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + if (probe_debug) { + fprintf(stderr, "NBI: negative segment addresses not supported\n"); + } + return -1; + } + if (seg_off > 512) { + if (probe_debug) { + fprintf(stderr, "NBI: segment outside 512 header\n"); + } + return -1; + } + } while(!(seg.flags & NBI_LAST_SEG)); + return 0; +} + +void nbi_usage(void) +{ + printf( "\n" + ); +} + +int nbi_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + off_t file_off; + uint32_t last0, last1; + int opt; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + } + } + /* Get a copy of the header */ + memcpy(&hdr, buf, sizeof(hdr)); + + /* Load the first 512 bytes */ + add_segment(info, buf + 0, 512, + (hdr.segoff.ds << 4) + hdr.segoff.bx, 512); + + /* Initialize variables */ + file_off = 512; + last0 = (hdr.segoff.ds << 4) + hdr.segoff.bx; + last1 = last0 + 512; + + /* Load the segments */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + uint32_t loadaddr; + memcpy(&seg, buf + seg_off, sizeof(seg)); + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if ((seg.flags & NBI_SEG) == NBI_SEG_ABSOLUTE) { + loadaddr = seg.loadaddr; + } + else if ((seg.flags & NBI_SEG) == NBI_SEG_APPEND) { + loadaddr = last1 + seg.loadaddr; + } +#if 0 + else if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + loadaddr = memsize - seg.loadaddr; + } +#endif + else if ((seg.flags & NBI_SEG) == NBI_SEG_PREPEND) { + loadaddr = last0 - seg.loadaddr; + } + else { + printf("warning: unhandled segment of type %0x\n", + seg.flags & NBI_SEG); + continue; + } + add_segment(info, buf + file_off, seg.imglength, + loadaddr, seg.memlength); + last0 = loadaddr; + last1 = last0 + seg.memlength; + file_off += seg.imglength; + } while(!(seg.flags & NBI_LAST_SEG)); + + if (hdr.length & NBI_ENTRY32) { + struct entry32_regs regs32; + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + regs32.eip = hdr.execaddr.linear; + elf_rel_set_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + } + else { + struct entry32_regs regs32; + struct entry16_regs regs16; + + /* Initialize the 16 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + regs16.cs = hdr.execaddr.segoff.cs; + regs16.ip = hdr.execaddr.segoff.ip; + elf_rel_set_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + + /* Initialize the 32 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + } + return 0; +} diff --git a/kexec/arch/i386/kexec-x86-common.c b/kexec/arch/i386/kexec-x86-common.c new file mode 100644 index 0000000..ffc95a9 --- /dev/null +++ b/kexec/arch/i386/kexec-x86-common.c @@ -0,0 +1,444 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "../../kexec-xen.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" + +/* Used below but not present in (older?) xenctrl.h */ +#ifndef E820_PMEM +#define E820_PMEM 7 +#define E820_PRAM 12 +#endif + +/* + * The real mode IVT ends at 0x400. + * See https://wiki.osdev.org/Interrupt_Vector_Table. + */ +#define REALMODE_IVT_END 0x400 + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/** + * The old /proc/iomem parsing code. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_proc_iomem(struct memory_range **range, int *ranges) +{ + const char *iomem= proc_iomem(); + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + dbgprintf("%016Lx-%016Lx : %s", start, end, str); + + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } + else if (strncasecmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + type = RANGE_ACPI; + } + else if (memcmp(str, "ACPI Non-volatile Storage\n", 26) == 0) { + type = RANGE_ACPI_NVS; + } + else if (memcmp(str, "Persistent Memory (legacy)\n", 27) == 0) { + type = RANGE_PRAM; + } + else if (memcmp(str, "Persistent Memory\n", 18) == 0) { + type = RANGE_PMEM; + } + else { + continue; + } + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + + dbgprintf("%016Lx-%016Lx : %x\n", start, end, type); + + memory_ranges++; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +/** + * Calls the architecture independent get_firmware_memmap_ranges() to parse + * /sys/firmware/memmap and then do some x86 only modifications. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_sysfs(struct memory_range **range, int *ranges) +{ + int ret; + size_t range_number = MAX_MEMORY_RANGES; + + ret = get_firmware_memmap_ranges(memory_range, &range_number); + if (ret != 0) { + fprintf(stderr, "Parsing the /sys/firmware memory map failed. " + "Falling back to /proc/iomem.\n"); + return get_memory_ranges_proc_iomem(range, ranges); + } + + *range = memory_range; + *ranges = range_number; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +unsigned xen_e820_to_kexec_type(uint32_t type) +{ + switch (type) { + case E820_RAM: + return RANGE_RAM; + case E820_ACPI: + return RANGE_ACPI; + case E820_NVS: + return RANGE_ACPI_NVS; + case E820_PMEM: + return RANGE_PMEM; + case E820_PRAM: + return RANGE_PRAM; + case E820_RESERVED: + default: + return RANGE_RESERVED; + } +} + +/** + * Memory map detection for Xen. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + int rc, ret = -1; + struct e820entry e820entries[MAX_MEMORY_RANGES]; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(rc)); + goto err; + } + + for (i = 0; i < rc; ++i) { + memory_range[i].start = e820entries[i].addr; + memory_range[i].end = e820entries[i].addr + e820entries[i].size - 1; + memory_range[i].type = xen_e820_to_kexec_type(e820entries[i].type); + } + + qsort(memory_range, rc, sizeof(struct memory_range), compare_ranges); + + *range = memory_range; + *ranges = rc; + + ret = 0; + +err: + xc_interface_close(xc); + + return ret; +} +#else +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void remove_range(struct memory_range *range, int nr_ranges, int index) +{ + int i, j; + + for (i = index; i < (nr_ranges-1); i++) { + j = i+1; + range[i] = range[j]; + } +} + +/** + * Verifies and corrects any overlapping ranges. + * The ranges array is assumed to be sorted already. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int fixup_memory_ranges(struct memory_range **range, int *ranges) +{ + int i; + int j; + int change_made; + int nr_ranges = *ranges; + struct memory_range *rp = *range; + +again: + change_made = 0; + for (i = 0; i < (nr_ranges-1); i++) { + j = i+1; + if (rp[i].start > rp[j].start) { + fprintf(stderr, "memory out of order!!\n"); + return 1; + } + + if (rp[i].type != rp[j].type) + continue; + + if (rp[i].start == rp[j].start) { + if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } else { + remove_range(rp, nr_ranges, i); + nr_ranges--; + change_made++; + } + } else { + if (rp[i].end > rp[j].start) { + if (rp[i].end < rp[j].end) { + rp[j].start = rp[i].end; + change_made++; + } else if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } + } + } + } + + /* fixing/removing an entry may make it wrong relative to the next */ + if (change_made) + goto again; + + *ranges = nr_ranges; + return 0; +} + +/** + * Detect the add_efi_memmap kernel parameter. + * + * On some EFI-based systems, the e820 map is empty, or does not contain a + * complete memory map. The add_efi_memmap parameter adds these entries to + * the kernel's memory map, but does not add them under sysfs, which causes + * kexec to fail in a way similar to how it does not work on Xen. + * + * @return 1 if parameter is present, 0 if not or if an error occurs. + */ +int efi_map_added( void ) { + char buf[512]; + FILE *fp = fopen( "/proc/cmdline", "r" ); + if( fp ) { + fgets( buf, 512, fp ); + fclose( fp ); + return strstr( buf, "add_efi_memmap" ) != NULL; + } else { + return 0; + } +} + +/** + * Return a sorted list of memory ranges. + * + * If we have the /sys/firmware/memmap interface, then use that. If not, + * or if parsing of that fails, use /proc/iomem as fallback. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * @param[in] kexec_flags the kexec_flags to determine if we load a normal + * or a crashdump kernel + * + * @return 0 on success, any other value on failure. + */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int ret, i; + + if (!efi_map_added() && !xen_present() && have_sys_firmware_memmap()) { + ret = get_memory_ranges_sysfs(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else if (xen_present()) { + ret = get_memory_ranges_xen(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else + ret = get_memory_ranges_proc_iomem(range, ranges); + + /* + * get_memory_ranges_sysfs(), get_memory_ranges_proc_iomem() and + * get_memory_ranges_xen() have already printed an error message, + * so fail silently here. + */ + if (ret != 0) + return ret; + + /* Don't report the interrupt table as ram */ + for (i = 0; i < *ranges; i++) { + if ((*range)[i].type == RANGE_RAM && + ((*range)[i].start < REALMODE_IVT_END)) { + (*range)[i].start = REALMODE_IVT_END; + break; + } + } + + /* + * Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported values are + * subset of user defined values. + */ + if ((kexec_flags & KEXEC_ON_CRASH) && + !(kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + uint64_t start, end; + + ret = get_crash_kernel_load_range(&start, &end); + if (ret != 0) { + fprintf(stderr, "get_crash_kernel_load_range failed.\n"); + return -1; + } + + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + + return ret; +} + +static uint64_t bootparam_get_acpi_rsdp(void) { + uint64_t acpi_rsdp = 0; + off_t offset = offsetof(struct x86_linux_param_header, acpi_rsdp_addr); + + if (get_bootparam(&acpi_rsdp, offset, sizeof(acpi_rsdp))) + return 0; + + return acpi_rsdp; +} + +static uint64_t efi_get_acpi_rsdp(void) { + FILE *fp; + char line[MAX_LINE], *s; + uint64_t acpi_rsdp = 0; + + fp = fopen("/sys/firmware/efi/systab", "r"); + if (!fp) + return acpi_rsdp; + + while(fgets(line, sizeof(line), fp) != 0) { + /* ACPI20= always goes before ACPI= */ + if ((strstr(line, "ACPI20=")) || (strstr(line, "ACPI="))) { + s = strchr(line, '=') + 1; + sscanf(s, "0x%lx", &acpi_rsdp); + break; + } + } + fclose(fp); + + return acpi_rsdp; +} + +uint64_t get_acpi_rsdp(void) +{ + uint64_t acpi_rsdp = 0; + + acpi_rsdp = bootparam_get_acpi_rsdp(); + + if (!acpi_rsdp) + acpi_rsdp = efi_get_acpi_rsdp(); + + return acpi_rsdp; +} diff --git a/kexec/arch/i386/kexec-x86.c b/kexec/arch/i386/kexec-x86.c new file mode 100644 index 0000000..444cb69 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.c @@ -0,0 +1,210 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +struct file_type file_type[] = { + { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, + multiboot_x86_usage }, + { "multiboot2-x86", multiboot2_x86_probe, multiboot2_x86_load, + multiboot2_x86_usage }, + { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, + { "bzImage", bzImage_probe, bzImage_load, bzImage_usage }, + { "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage }, + { "nbi-x86", nbi_probe, nbi_load, nbi_usage }, +}; +int file_types = sizeof(file_type)/sizeof(file_type[0]); + + +void arch_usage(void) +{ + printf( + " --reset-vga Attempt to reset a standard vga device\n" + " --serial=<port> Specify the serial port for debug output\n" + " --serial-baud=<baud_rate> Specify the serial port baud rate\n" + " --console-vga Enable the vga console\n" + " --console-serial Enable the serial console\n" + " --elf32-core-headers Prepare core headers in ELF32 format\n" + " --elf64-core-headers Prepare core headers in ELF64 format\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" + ); +} + +struct arch_options_t arch_options = { + .reset_vga = 0, + .serial_base = 0x3f8, + .serial_baud = 0, + .console_vga = 0, + .console_serial = 0, + .core_header_type = CORE_TYPE_UNDEF, + .pass_memmap_cmdline = 0, + .noefi = 0, +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + unsigned long value; + char *end; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_RESET_VGA: + arch_options.reset_vga = 1; + break; + case OPT_CONSOLE_VGA: + arch_options.console_vga = 1; + break; + case OPT_CONSOLE_SERIAL: + arch_options.console_serial = 1; + break; + case OPT_SERIAL: + value = ULONG_MAX; + if (strcmp(optarg, "ttyS0") == 0) { + value = 0x3f8; + } + else if (strcmp(optarg, "ttyS1") == 0) { + value = 0x2f8; + } + else if (strncmp(optarg, "0x", 2) == 0) { + value = strtoul(optarg +2, &end, 16); + if (*end != '\0') { + value = ULONG_MAX; + } + } + if (value >= 65536) { + fprintf(stderr, "Bad serial port base '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_base = value; + break; + case OPT_SERIAL_BAUD: + value = strtoul(optarg, &end, 0); + if ((value > 115200) || ((115200 %value) != 0) || + (value < 9600) || (*end)) + { + fprintf(stderr, "Bad serial port baud rate '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_baud = value; + break; + case OPT_ELF32_CORE: + arch_options.core_header_type = CORE_TYPE_ELF32; + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_386 here. + */ + { "i386", KEXEC_ARCH_DEFAULT }, + { "i486", KEXEC_ARCH_DEFAULT }, + { "i586", KEXEC_ARCH_DEFAULT }, + { "i686", KEXEC_ARCH_DEFAULT }, + { "x86_64", KEXEC_ARCH_X86_64 }, + { 0, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *info) +{ + if ((info->kexec_flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_X86_64) + { + if (!info->rhdr.e_shdr) { + fprintf(stderr, + "A trampoline is required for cross architecture support\n"); + return -1; + } + elf_rel_set_symbol(&info->rhdr, "compat_x86_64_entry32", + &info->entry, sizeof(info->entry)); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "compat_x86_64"); + } + return 0; +} + +void arch_update_purgatory(struct kexec_info *info) +{ + uint8_t panic_kernel = 0; + + elf_rel_set_symbol(&info->rhdr, "reset_vga", + &arch_options.reset_vga, sizeof(arch_options.reset_vga)); + elf_rel_set_symbol(&info->rhdr, "serial_base", + &arch_options.serial_base, sizeof(arch_options.serial_base)); + elf_rel_set_symbol(&info->rhdr, "serial_baud", + &arch_options.serial_baud, sizeof(arch_options.serial_baud)); + elf_rel_set_symbol(&info->rhdr, "console_vga", + &arch_options.console_vga, sizeof(arch_options.console_vga)); + elf_rel_set_symbol(&info->rhdr, "console_serial", + &arch_options.console_serial, sizeof(arch_options.console_serial)); + elf_rel_set_symbol(&info->rhdr, "backup_src_start", + &info->backup_src_start, sizeof(info->backup_src_start)); + elf_rel_set_symbol(&info->rhdr, "backup_src_size", + &info->backup_src_size, sizeof(info->backup_src_size)); + if (info->kexec_flags & KEXEC_ON_CRASH) { + panic_kernel = 1; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &info->backup_start, sizeof(info->backup_start)); + } + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &panic_kernel, sizeof(panic_kernel)); +} diff --git a/kexec/arch/i386/kexec-x86.h b/kexec/arch/i386/kexec-x86.h new file mode 100644 index 0000000..46e2898 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.h @@ -0,0 +1,97 @@ +#ifndef KEXEC_X86_H +#define KEXEC_X86_H + +#define MAX_MEMORY_RANGES 2048 + +enum coretype { + CORE_TYPE_UNDEF = 0, + CORE_TYPE_ELF32 = 1, + CORE_TYPE_ELF64 = 2 +}; + +extern unsigned char compat_x86_64[]; +extern uint32_t compat_x86_64_size, compat_x86_64_entry32; + +struct entry32_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint32_t eip; +}; + +struct entry16_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint16_t ds; + uint16_t es; + uint16_t ss; + uint16_t fs; + uint16_t gs; + uint16_t ip; + uint16_t cs; + uint16_t pad; +}; + +struct arch_options_t { + uint8_t reset_vga; + uint16_t serial_base; + uint32_t serial_baud; + uint8_t console_vga; + uint8_t console_serial; + enum coretype core_header_type; + uint8_t pass_memmap_cmdline; + uint8_t noefi; + uint8_t reuse_video_type; +}; + +int multiboot_x86_probe(const char *buf, off_t len); +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot_x86_usage(void); + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot2_x86_usage(void); +int multiboot2_x86_probe(const char *buf, off_t buf_len); + +int elf_x86_probe(const char *buf, off_t len); +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch); +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_x86_usage(void); + +int bzImage_probe(const char *buf, off_t len); +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void bzImage_usage(void); +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry); + +int beoboot_probe(const char *buf, off_t len); +int beoboot_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void beoboot_usage(void); + +int nbi_probe(const char *buf, off_t len); +int nbi_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void nbi_usage(void); + +extern unsigned xen_e820_to_kexec_type(uint32_t type); +extern uint64_t get_acpi_rsdp(void); +#endif /* KEXEC_X86_H */ diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c new file mode 100644 index 0000000..9a281dc --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.c @@ -0,0 +1,971 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#define _GNU_SOURCE +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/random.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/fb.h> +#include <linux/screen_info.h> +#include <unistd.h> +#include <dirent.h> +#include <mntent.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "../../kexec/kexec-syscall.h" + +#ifndef VIDEO_CAPABILITY_64BIT_BASE +#define VIDEO_CAPABILITY_64BIT_BASE (1 << 1) /* Frame buffer base is 64-bit */ +#endif + +void init_linux_parameters(struct x86_linux_param_header *real_mode) +{ + /* Fill in the values that are usually provided by the kernel. */ + + /* Boot block magic */ + memcpy(real_mode->header_magic, "HdrS", 4); + real_mode->protocol_version = 0x0206; + real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + real_mode->cmdline_size = COMMAND_LINE_SIZE; +} + +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high) +{ + char *cmdline_ptr; + unsigned long initrd_base, initrd_addr_max; + + /* Say I'm a boot loader */ + real_mode->loader_type = LOADER_TYPE_KEXEC << 4; + + /* No loader flags */ + real_mode->loader_flags = 0; + + /* Find the maximum initial ramdisk address */ + if (initrd_high) + initrd_addr_max = ULONG_MAX; + else { + initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + if (real_mode->protocol_version >= 0x0203) { + initrd_addr_max = real_mode->initrd_addr_max; + dbgprintf("initrd_addr_max is 0x%lx\n", + initrd_addr_max); + } + } + + /* Load the initrd if we have one */ + if (initrd_buf) { + initrd_base = add_buffer(info, + initrd_buf, initrd_size, initrd_size, + 4096, INITRD_BASE, initrd_addr_max, -1); + dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base, + initrd_size); + } else { + initrd_base = 0; + initrd_size = 0; + } + + /* Ramdisk address and size */ + real_mode->initrd_start = initrd_base & 0xffffffffUL; + real_mode->initrd_size = initrd_size & 0xffffffffUL; + + if (real_mode->protocol_version >= 0x020c && + (initrd_base & 0xffffffffUL) != initrd_base) + real_mode->ext_ramdisk_image = initrd_base >> 32; + + if (real_mode->protocol_version >= 0x020c && + (initrd_size & 0xffffffffUL) != initrd_size) + real_mode->ext_ramdisk_size = initrd_size >> 32; + + /* The location of the command line */ + /* if (real_mode_base == 0x90000) { */ + real_mode->cl_magic = CL_MAGIC_VALUE; + real_mode->cl_offset = cmdline_offset; + /* setup_move_size */ + /* } */ + if (real_mode->protocol_version >= 0x0202) { + unsigned long cmd_line_ptr = real_mode_base + cmdline_offset; + + real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffffUL; + if ((real_mode->protocol_version >= 0x020c) && + ((cmd_line_ptr & 0xffffffffUL) != cmd_line_ptr)) + real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32; + } + + /* Fill in the command line */ + if (cmdline_len > COMMAND_LINE_SIZE) { + cmdline_len = COMMAND_LINE_SIZE; + } + cmdline_ptr = ((char *)real_mode) + cmdline_offset; + memcpy(cmdline_ptr, cmdline, cmdline_len); + cmdline_ptr[cmdline_len - 1] = '\0'; +} + +static int setup_linux_vesafb(struct x86_linux_param_header *real_mode) +{ + struct fb_fix_screeninfo fix; + struct fb_var_screeninfo var; + int fd; + + fd = open("/dev/fb0", O_RDONLY); + if (-1 == fd) + return -1; + + if (-1 == ioctl(fd, FBIOGET_FSCREENINFO, &fix)) + goto out; + if (-1 == ioctl(fd, FBIOGET_VSCREENINFO, &var)) + goto out; + if (0 == strcmp(fix.id, "VESA VGA")) { + /* VIDEO_TYPE_VLFB */ + real_mode->orig_video_isVGA = 0x23; + } else if (0 == strcmp(fix.id, "EFI VGA")) { + /* VIDEO_TYPE_EFI */ + real_mode->orig_video_isVGA = 0x70; + } else if (arch_options.reuse_video_type) { + int err; + off_t offset = offsetof(typeof(*real_mode), orig_video_isVGA); + + /* blindly try old boot time video type */ + err = get_bootparam(&real_mode->orig_video_isVGA, offset, 1); + if (err) + goto out; + } else { + real_mode->orig_video_isVGA = 0; + close(fd); + return 0; + } + close(fd); + + real_mode->lfb_width = var.xres; + real_mode->lfb_height = var.yres; + real_mode->lfb_depth = var.bits_per_pixel; + real_mode->lfb_base = fix.smem_start & 0xffffffffUL; + real_mode->lfb_linelength = fix.line_length; + real_mode->vesapm_seg = 0; + + if (fix.smem_start > 0xffffffffUL) { + real_mode->ext_lfb_base = fix.smem_start >> 32; + real_mode->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + + /* FIXME: better get size from the file returned by proc_iomem() */ + real_mode->lfb_size = (fix.smem_len + 65535) / 65536; + real_mode->pages = (fix.smem_len + 4095) / 4096; + + if (var.bits_per_pixel > 8) { + real_mode->red_pos = var.red.offset; + real_mode->red_size = var.red.length; + real_mode->green_pos = var.green.offset; + real_mode->green_size = var.green.length; + real_mode->blue_pos = var.blue.offset; + real_mode->blue_size = var.blue.length; + real_mode->rsvd_pos = var.transp.offset; + real_mode->rsvd_size = var.transp.length; + } + return 0; + + out: + close(fd); + return -1; +} + +#define EDD_SYFS_DIR "/sys/firmware/edd" + +#define EDD_EXT_FIXED_DISK_ACCESS (1 << 0) +#define EDD_EXT_DEVICE_LOCKING_AND_EJECTING (1 << 1) +#define EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT (1 << 2) +#define EDD_EXT_64BIT_EXTENSIONS (1 << 3) + +/* + * Scans one line from a given filename. Returns on success the number of + * items written (same like scanf()). + */ +static int file_scanf(const char *dir, const char *file, const char *scanf_line, ...) +{ + va_list argptr; + FILE *fp; + int retno; + char filename[PATH_MAX]; + + snprintf(filename, PATH_MAX, "%s/%s", dir, file); + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + va_start(argptr, scanf_line); + retno = vfscanf(fp, scanf_line, argptr); + va_end(argptr); + + fclose(fp); + + return retno; +} + +static int parse_edd_extensions(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + char line[1024]; + uint16_t flags = 0; + FILE *fp; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "extensions"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + while (fgets(line, 1024, fp)) { + /* + * strings are in kernel source, function edd_show_extensions() + * drivers/firmware/edd.c + */ + if (strstr(line, "Fixed disk access") == line) + flags |= EDD_EXT_FIXED_DISK_ACCESS; + else if (strstr(line, "Device locking and ejecting") == line) + flags |= EDD_EXT_DEVICE_LOCKING_AND_EJECTING; + else if (strstr(line, "Enhanced Disk Drive support") == line) + flags |= EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT; + else if (strstr(line, "64-bit extensions") == line) + flags |= EDD_EXT_64BIT_EXTENSIONS; + } + + fclose(fp); + + edd_info->interface_support = flags; + + return 0; +} + +static int read_edd_raw_data(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + FILE *fp; + size_t read_chars; + uint16_t len; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "raw_data"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + memset(edd_info->edd_device_params, 0, EDD_DEVICE_PARAM_SIZE); + read_chars = fread(edd_info->edd_device_params, sizeof(uint8_t), + EDD_DEVICE_PARAM_SIZE, fp); + fclose(fp); + + len = ((uint16_t *)edd_info->edd_device_params)[0]; + dbgprintf("EDD raw data has length %d\n", len); + + if (read_chars < len) { + fprintf(stderr, "BIOS reported EDD length of %hd but only " + "%d chars read.\n", len, (int)read_chars); + return -1; + } + + return 0; +} + +static int add_edd_entry(struct x86_linux_param_header *real_mode, + const char *sysfs_name, int *current_edd, int *current_mbr) +{ + uint8_t devnum, version; + uint32_t mbr_sig; + struct edd_info *edd_info; + + if (!current_mbr || !current_edd) { + fprintf(stderr, "%s: current_edd and current_edd " + "must not be NULL", __FUNCTION__); + return -1; + } + + edd_info = &real_mode->eddbuf[*current_edd]; + memset(edd_info, 0, sizeof(struct edd_info)); + + /* extract the device number */ + if (sscanf(basename(sysfs_name), "int13_dev%hhx", &devnum) != 1) { + fprintf(stderr, "Invalid format of int13_dev dir " + "entry: %s\n", basename(sysfs_name)); + return -1; + } + + /* if there's a MBR signature, then add it */ + if (file_scanf(sysfs_name, "mbr_signature", "0x%x", &mbr_sig) == 1) { + real_mode->edd_mbr_sig_buffer[*current_mbr] = mbr_sig; + (*current_mbr)++; + dbgprintf("EDD Device 0x%x: mbr_sig=0x%x\n", devnum, mbr_sig); + } + + /* set the device number */ + edd_info->device = devnum; + + /* set the version */ + if (file_scanf(sysfs_name, "version", "0x%hhx", &version) != 1) + return -1; + + edd_info->version = version; + + /* if version == 0, that's some kind of dummy entry */ + if (version != 0) { + /* legacy_max_cylinder */ + if (file_scanf(sysfs_name, "legacy_max_cylinder", "%hu", + &edd_info->legacy_max_cylinder) != 1) { + fprintf(stderr, "Reading legacy_max_cylinder failed.\n"); + return -1; + } + + /* legacy_max_head */ + if (file_scanf(sysfs_name, "legacy_max_head", "%hhu", + &edd_info->legacy_max_head) != 1) { + fprintf(stderr, "Reading legacy_max_head failed.\n"); + return -1; + } + + /* legacy_sectors_per_track */ + if (file_scanf(sysfs_name, "legacy_sectors_per_track", "%hhu", + &edd_info->legacy_sectors_per_track) != 1) { + fprintf(stderr, "Reading legacy_sectors_per_track failed.\n"); + return -1; + } + + /* Parse the EDD extensions */ + if (parse_edd_extensions(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Parsing EDD extensions failed.\n"); + return -1; + } + + /* Parse the raw info */ + if (read_edd_raw_data(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Reading EDD raw data failed.\n"); + return -1; + } + } + + (*current_edd)++; + + return 0; +} + +static void zero_edd(struct x86_linux_param_header *real_mode) +{ + real_mode->eddbuf_entries = 0; + real_mode->edd_mbr_sig_buf_entries = 0; + memset(real_mode->eddbuf, 0, + EDDMAXNR * sizeof(struct edd_info)); + memset(real_mode->edd_mbr_sig_buffer, 0, + EDD_MBR_SIG_MAX * sizeof(uint32_t)); +} + +void setup_edd_info(struct x86_linux_param_header *real_mode) +{ + DIR *edd_dir; + struct dirent *cursor; + int current_edd = 0; + int current_mbr = 0; + + edd_dir = opendir(EDD_SYFS_DIR); + if (!edd_dir) { + dbgprintf(EDD_SYFS_DIR " does not exist.\n"); + return; + } + + zero_edd(real_mode); + while ((cursor = readdir(edd_dir))) { + char full_dir_name[PATH_MAX]; + + /* only read the entries that start with "int13_dev" */ + if (strstr(cursor->d_name, "int13_dev") != cursor->d_name) + continue; + + snprintf(full_dir_name, PATH_MAX, "%s/%s", + EDD_SYFS_DIR, cursor->d_name); + full_dir_name[PATH_MAX-1] = 0; + + if (add_edd_entry(real_mode, full_dir_name, ¤t_edd, + ¤t_mbr) != 0) { + zero_edd(real_mode); + goto out; + } + } + + real_mode->eddbuf_entries = current_edd; + real_mode->edd_mbr_sig_buf_entries = current_mbr; + +out: + closedir(edd_dir); + + dbgprintf("Added %d EDD MBR entries and %d EDD entries.\n", + real_mode->edd_mbr_sig_buf_entries, + real_mode->eddbuf_entries); +} + +/* + * This really only makes sense for virtual filesystems that are only expected + * to be mounted once (sysfs, debugsfs, proc), as it will return the first + * instance listed in /proc/mounts, falling back to mtab if absent. + * We search by type and not by name because the name can be anything; + * while setting the name equal to the mount point is common, it cannot be + * relied upon, as even kernel documentation examples recommends using + * "none" as the name e.g. for debugfs. + */ +char *find_mnt_by_type(char *type) +{ + FILE *mtab; + struct mntent *mnt; + char *mntdir; + + mtab = setmntent("/proc/mounts", "r"); + if (!mtab) { + // Fall back to mtab + mtab = setmntent("/etc/mtab", "r"); + } + if (!mtab) + return NULL; + for(mnt = getmntent(mtab); mnt; mnt = getmntent(mtab)) { + if (strcmp(mnt->mnt_type, type) == 0) + break; + } + mntdir = mnt ? strdup(mnt->mnt_dir) : NULL; + endmntent(mtab); + return mntdir; +} + +int get_bootparam(void *buf, off_t offset, size_t size) +{ + int data_file; + char *debugfs_mnt, *sysfs_mnt; + char filename[PATH_MAX]; + int err, has_sysfs_params = 0; + + sysfs_mnt = find_mnt_by_type("sysfs"); + if (sysfs_mnt) { + snprintf(filename, PATH_MAX, "%s/%s", sysfs_mnt, + "kernel/boot_params/data"); + free(sysfs_mnt); + err = access(filename, F_OK); + if (!err) + has_sysfs_params = 1; + } + + if (!has_sysfs_params) { + debugfs_mnt = find_mnt_by_type("debugfs"); + if (!debugfs_mnt) + return 1; + snprintf(filename, PATH_MAX, "%s/%s", debugfs_mnt, + "boot_params/data"); + free(debugfs_mnt); + } + + data_file = open(filename, O_RDONLY); + if (data_file < 0) + return 1; + if (lseek(data_file, offset, SEEK_SET) < 0) + goto close; + read(data_file, buf, size); +close: + close(data_file); + return 0; +} + +void setup_subarch(struct x86_linux_param_header *real_mode) +{ + off_t offset = offsetof(typeof(*real_mode), hardware_subarch); + + get_bootparam(&real_mode->hardware_subarch, offset, sizeof(uint32_t)); +} + +struct efi_mem_descriptor { + uint32_t type; + uint32_t pad; + uint64_t phys_addr; + uint64_t virt_addr; + uint64_t num_pages; + uint64_t attribute; +}; + +struct efi_setup_data { + uint64_t fw_vendor; + uint64_t runtime; + uint64_t tables; + uint64_t smbios; + uint64_t reserved[8]; +}; + +struct setup_data { + uint64_t next; + uint32_t type; +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_RNG_SEED 9 + uint32_t len; + uint8_t data[0]; +} __attribute__((packed)); + +static int get_efi_value(const char *filename, + const char *pattern, uint64_t *val) +{ + FILE *fp; + char line[1024], *s, *end; + + fp = fopen(filename, "r"); + if (!fp) + return 1; + + while (fgets(line, sizeof(line), fp) != 0) { + s = strstr(line, pattern); + if (!s) + continue; + *val = strtoull(s + strlen(pattern), &end, 16); + if (*val == ULLONG_MAX) { + fclose(fp); + return 2; + } + break; + } + + fclose(fp); + return 0; +} + +static int get_efi_values(struct efi_setup_data *esd) +{ + int ret = 0; + + ret = get_efi_value("/sys/firmware/efi/systab", "SMBIOS=0x", + &esd->smbios); + ret |= get_efi_value("/sys/firmware/efi/fw_vendor", "0x", + &esd->fw_vendor); + ret |= get_efi_value("/sys/firmware/efi/runtime", "0x", + &esd->runtime); + ret |= get_efi_value("/sys/firmware/efi/config_table", "0x", + &esd->tables); + return ret; +} + +static int get_efi_runtime_map(struct efi_mem_descriptor **map) +{ + DIR *dirp; + struct dirent *entry; + char filename[1024]; + struct efi_mem_descriptor md, *p = NULL; + int nr_maps = 0; + + dirp = opendir("/sys/firmware/efi/runtime-map"); + if (!dirp) + return 0; + while ((entry = readdir(dirp)) != NULL) { + sprintf(filename, + "/sys/firmware/efi/runtime-map/%s", + (char *)entry->d_name); + if (*entry->d_name == '.') + continue; + file_scanf(filename, "type", "0x%x", (unsigned int *)&md.type); + file_scanf(filename, "phys_addr", "0x%llx", + (unsigned long long *)&md.phys_addr); + file_scanf(filename, "virt_addr", "0x%llx", + (unsigned long long *)&md.virt_addr); + file_scanf(filename, "num_pages", "0x%llx", + (unsigned long long *)&md.num_pages); + file_scanf(filename, "attribute", "0x%llx", + (unsigned long long *)&md.attribute); + p = realloc(p, (nr_maps + 1) * sizeof(md)); + if (!p) + goto err_out; + + *(p + nr_maps) = md; + *map = p; + nr_maps++; + } + + closedir(dirp); + return nr_maps; +err_out: + if (*map) + free(*map); + closedir(dirp); + return 0; +} + +struct efi_info { + uint32_t efi_loader_signature; + uint32_t efi_systab; + uint32_t efi_memdesc_size; + uint32_t efi_memdesc_version; + uint32_t efi_memmap; + uint32_t efi_memmap_size; + uint32_t efi_systab_hi; + uint32_t efi_memmap_hi; +}; + +/* + * Add another instance to single linked list of struct setup_data. + * Please refer to kernel Documentation/x86/boot.txt for more details + * about setup_data structure. + */ +static void add_setup_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode, + struct setup_data *sd) +{ + int sdsize = sizeof(struct setup_data) + sd->len; + + sd->next = real_mode->setup_data; + real_mode->setup_data = add_buffer(info, sd, sdsize, sdsize, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); +} + +/* + * setup_efi_data will collect below data and pass them to 2nd kernel. + * 1) SMBIOS, fw_vendor, runtime, config_table, they are passed via x86 + * setup_data. + * 2) runtime memory regions, set the memmap related fields in efi_info. + */ +static int setup_efi_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int64_t memmap_paddr; + struct setup_data *sd; + struct efi_setup_data *esd; + struct efi_mem_descriptor *maps; + int nr_maps, size, ret = 0; + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + ret = access("/sys/firmware/efi/systab", F_OK); + if (ret < 0) + goto out; + + esd = malloc(sizeof(struct efi_setup_data)); + if (!esd) { + ret = 1; + goto out; + } + memset(esd, 0, sizeof(struct efi_setup_data)); + ret = get_efi_values(esd); + if (ret) + goto free_esd; + nr_maps = get_efi_runtime_map(&maps); + if (!nr_maps) { + ret = 2; + goto free_esd; + } + sd = malloc(sizeof(struct setup_data) + sizeof(*esd)); + if (!sd) { + ret = 3; + goto free_maps; + } + + memset(sd, 0, sizeof(struct setup_data) + sizeof(*esd)); + sd->next = 0; + sd->type = SETUP_EFI; + sd->len = sizeof(*esd); + memcpy(sd->data, esd, sizeof(*esd)); + free(esd); + + add_setup_data(info, real_mode, sd); + + size = nr_maps * sizeof(struct efi_mem_descriptor); + memmap_paddr = add_buffer(info, maps, size, size, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); + ei->efi_memmap = memmap_paddr & 0xffffffff; + ei->efi_memmap_hi = memmap_paddr >> 32; + ei->efi_memmap_size = size; + ei->efi_memdesc_size = sizeof(struct efi_mem_descriptor); + + return 0; +free_maps: + free(maps); +free_esd: + free(esd); +out: + return ret; +} + +static void add_e820_map_from_mr(struct x86_linux_param_header *real_mode, + struct e820entry *e820, struct memory_range *range, int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + e820[i].addr = range[i].start; + e820[i].size = range[i].end - range[i].start + 1; + switch (range[i].type) { + case RANGE_RAM: + e820[i].type = E820_RAM; + break; + case RANGE_ACPI: + e820[i].type = E820_ACPI; + break; + case RANGE_ACPI_NVS: + e820[i].type = E820_NVS; + break; + case RANGE_PMEM: + e820[i].type = E820_PMEM; + break; + case RANGE_PRAM: + e820[i].type = E820_PRAM; + break; + default: + case RANGE_RESERVED: + e820[i].type = E820_RESERVED; + break; + } + dbgprintf("%016lx-%016lx (%d)\n", + e820[i].addr, + e820[i].addr + e820[i].size - 1, + e820[i].type); + + if (range[i].type != RANGE_RAM) + continue; + if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { + unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); + real_mode->ext_mem_k = mem_k; + real_mode->alt_mem_k = mem_k; + if (mem_k > 0xfc00) { + real_mode->ext_mem_k = 0xfc00; /* 64M */ + } + if (mem_k > 0xffffffff) { + real_mode->alt_mem_k = 0xffffffff; + } + } + } +} + +static void setup_e820_ext(struct kexec_info *info, struct x86_linux_param_header *real_mode, + struct memory_range *range, int nr_range) +{ + struct setup_data *sd; + struct e820entry *e820; + int nr_range_ext; + + nr_range_ext = nr_range - E820MAX; + sd = xmalloc(sizeof(struct setup_data) + nr_range_ext * sizeof(struct e820entry)); + sd->next = 0; + sd->len = nr_range_ext * sizeof(struct e820entry); + sd->type = SETUP_E820_EXT; + + e820 = (struct e820entry *) sd->data; + dbgprintf("Extended E820 via setup_data:\n"); + add_e820_map_from_mr(real_mode, e820, range + E820MAX, nr_range_ext); + add_setup_data(info, real_mode, sd); +} + +static void setup_e820(struct kexec_info *info, struct x86_linux_param_header *real_mode) +{ + struct memory_range *range; + int nr_range, nr_range_saved; + + + if (info->kexec_flags & KEXEC_ON_CRASH && !arch_options.pass_memmap_cmdline) { + range = info->crash_range; + nr_range = info->nr_crash_ranges; + } else { + range = info->memory_range; + nr_range = info->memory_ranges; + } + + nr_range_saved = nr_range; + if (nr_range > E820MAX) { + nr_range = E820MAX; + } + + real_mode->e820_map_nr = nr_range; + dbgprintf("E820 memmap:\n"); + add_e820_map_from_mr(real_mode, real_mode->e820_map, range, nr_range); + + if (nr_range_saved > E820MAX) { + dbgprintf("extra E820 memmap are passed via setup_data\n"); + setup_e820_ext(info, real_mode, range, nr_range_saved); + } +} + +static void setup_rng_seed(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + struct { + struct setup_data header; + uint8_t rng_seed[32]; + } *sd; + + sd = xmalloc(sizeof(*sd)); + sd->header.next = 0; + sd->header.len = sizeof(sd->rng_seed); + sd->header.type = SETUP_RNG_SEED; + + if (getrandom(sd->rng_seed, sizeof(sd->rng_seed), GRND_NONBLOCK) != + sizeof(sd->rng_seed)) + return; /* Not initialized, so don't pass a seed. */ + + add_setup_data(info, real_mode, &sd->header); +} + +static int +get_efi_mem_desc_version(struct x86_linux_param_header *real_mode) +{ + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + return ei->efi_memdesc_version; +} + +static void setup_efi_info(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int ret, desc_version; + off_t offset = offsetof(typeof(*real_mode), efi_info); + + ret = get_bootparam(&real_mode->efi_info, offset, 32); + if (ret) + return; + if (((struct efi_info *)real_mode->efi_info)->efi_memmap_size == 0) + /* zero filled efi_info */ + goto out; + desc_version = get_efi_mem_desc_version(real_mode); + if (desc_version != 1) { + fprintf(stderr, + "efi memory descriptor version %d is not supported!\n", + desc_version); + goto out; + } + ret = setup_efi_data(info, real_mode); + if (ret) + goto out; + + return; + +out: + memset(&real_mode->efi_info, 0, 32); + return; +} + +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int err; + + /* get subarch from running kernel */ + setup_subarch(real_mode); + if (bzImage_support_efi_boot && !arch_options.noefi) + setup_efi_info(info, real_mode); + + /* Default screen size */ + real_mode->orig_x = 0; + real_mode->orig_y = 0; + real_mode->orig_video_page = 0; + real_mode->orig_video_mode = 0; + real_mode->orig_video_cols = 80; + real_mode->orig_video_lines = 25; + real_mode->orig_video_ega_bx = 0; + real_mode->orig_video_isVGA = 1; + real_mode->orig_video_points = 16; + + /* setup vesa fb if possible, or just use original screen_info */ + err = setup_linux_vesafb(real_mode); + if (err) { + uint16_t cl_magic, cl_offset; + + /* save and restore the old cmdline param if needed */ + cl_magic = real_mode->cl_magic; + cl_offset = real_mode->cl_offset; + + err = get_bootparam(real_mode, 0, sizeof(struct screen_info)); + if (!err) { + real_mode->cl_magic = cl_magic; + real_mode->cl_offset = cl_offset; + } + } + /* Fill in the memsize later */ + real_mode->ext_mem_k = 0; + real_mode->alt_mem_k = 0; + real_mode->e820_map_nr = 0; + + /* Default APM info */ + memset(&real_mode->apm_bios_info, 0, sizeof(real_mode->apm_bios_info)); + /* Default drive info */ + memset(&real_mode->drive_info, 0, sizeof(real_mode->drive_info)); + /* Default sysdesc table */ + real_mode->sys_desc_table.length = 0; + + /* default yes: this can be overridden on the command line */ + real_mode->mount_root_rdonly = 0xFFFF; + + /* default /dev/hda + * this can be overrident on the command line if necessary. + */ + real_mode->root_dev = (0x3 <<8)| 0; + + /* another safe default */ + real_mode->aux_device_info = 0; + + setup_e820(info, real_mode); + + /* pass RNG seed */ + setup_rng_seed(info, real_mode); + + /* fill the EDD information */ + setup_edd_info(real_mode); + + /* Always try to fill acpi_rsdp_addr */ + real_mode->acpi_rsdp_addr = get_acpi_rsdp(); +} + +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len) +{ + struct setup_data *sd; + + sd = xmalloc(sizeof(struct setup_data) + dtb_len); + sd->next = 0; + sd->len = dtb_len; + sd->type = SETUP_DTB; + memcpy(sd->data, dtb_buf, dtb_len); + + + add_setup_data(info, real_mode, sd); +} diff --git a/kexec/arch/i386/x86-linux-setup.h b/kexec/arch/i386/x86-linux-setup.h new file mode 100644 index 0000000..b5e1ad5 --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.h @@ -0,0 +1,38 @@ +#ifndef X86_LINUX_SETUP_H +#define X86_LINUX_SETUP_H +#include <x86/x86-linux.h> + +void init_linux_parameters(struct x86_linux_param_header *real_mode); +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high); +static inline void setup_linux_bootloader_parameters( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size) +{ + setup_linux_bootloader_parameters_high(info, + real_mode, real_mode_base, + cmdline_offset, cmdline, cmdline_len, + initrd_buf, initrd_size, 0); +} +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode); +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len); +int get_bootparam(void *buf, off_t offset, size_t size); + + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +/* command line parameter may be appended by purgatory */ +#define PURGATORY_CMDLINE_SIZE 64 +extern int bzImage_support_efi_boot; +extern struct arch_options_t arch_options; + +#endif /* X86_LINUX_SETUP_H */ |