diff options
Diffstat (limited to 'kexec/arch')
147 files changed, 24697 insertions, 0 deletions
diff --git a/kexec/arch/alpha/Makefile b/kexec/arch/alpha/Makefile new file mode 100644 index 0000000..4575d61 --- /dev/null +++ b/kexec/arch/alpha/Makefile @@ -0,0 +1,4 @@ +alpha_KEXEC_SRCS= +dist += kexec/arch/alpha/Makefile kexec/arch/alpha/include/arch/options.h + $(alpha_KEXEC_SRCS) + diff --git a/kexec/arch/alpha/include/arch/options.h b/kexec/arch/alpha/include/arch/options.h new file mode 100644 index 0000000..a012c8a --- /dev/null +++ b/kexec/arch/alpha/include/arch/options.h @@ -0,0 +1,20 @@ +#ifndef KEXEC_ARCH_ALPHA_OPTIONS_H +#define KEXEC_ARCH_ALPHA_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* See the other architectures for details of these; Alpha has no + * loader-specific options yet. + */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_ALPHA_OPTIONS_H */ diff --git a/kexec/arch/arm/Makefile b/kexec/arch/arm/Makefile new file mode 100644 index 0000000..4454f47 --- /dev/null +++ b/kexec/arch/arm/Makefile @@ -0,0 +1,34 @@ +# +# kexec arm (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +arm_FS2DT = kexec/fs2dt.c +arm_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/arm/crashdump-arm.h \ + -include $(srcdir)/kexec/arch/arm/kexec-arm.h + +arm_MEM_REGIONS = kexec/mem_regions.c + +arm_KEXEC_SRCS= kexec/arch/arm/kexec-elf-rel-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-zImage-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-uImage-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/crashdump-arm.c +arm_KEXEC_SRCS+= kexec/fs2dt.c + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +arm_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +# We want 64-bit file IO for kdump to work correctly on LPAE systems +arm_CPPFLAGS += -D_FILE_OFFSET_BITS=64 + +arm_KEXEC_SRCS += $(libfdt_SRCS) + +arm_UIMAGE = kexec/kexec-uImage.c +arm_PHYS_TO_VIRT = kexec/arch/arm/phys_to_virt.c + +dist += kexec/arch/arm/Makefile $(arm_KEXEC_SRCS) $(arm_PHYS_TO_VIRT) \ + kexec/arch/arm/iomem.h kexec/arch/arm/phys_to_virt.h \ + kexec/arch/arm/crashdump-arm.h kexec/arch/arm/kexec-arm.h \ + kexec/arch/arm/include/arch/options.h diff --git a/kexec/arch/arm/crashdump-arm.c b/kexec/arch/arm/crashdump-arm.c new file mode 100644 index 0000000..1ec1826 --- /dev/null +++ b/kexec/arch/arm/crashdump-arm.c @@ -0,0 +1,388 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) Nokia Corporation, 2010. + * Author: Mika Westerberg + * + * Based on x86 implementation + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <limits.h> +#include <elf.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../crashdump.h" +#include "../../mem_regions.h" +#include "crashdump-arm.h" +#include "iomem.h" +#include "phys_to_virt.h" + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ELFDATANATIVE ELFDATA2MSB +#else +#error "Unknown machine endian" +#endif + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +static struct memory_range crash_memory_ranges[CRASH_MAX_MEMORY_RANGES]; +struct memory_ranges usablemem_rgns = { + .max_size = CRASH_MAX_MEMORY_RANGES, + .ranges = crash_memory_ranges, +}; + +/* The boot-time physical memory range reserved for crashkernel region */ +struct memory_range crash_kernel_mem; + +/* reserved regions */ +#define CRASH_MAX_RESERVED_RANGES 2 +static struct memory_range crash_reserved_ranges[CRASH_MAX_RESERVED_RANGES]; +static struct memory_ranges crash_reserved_rgns = { + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_ranges, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info = { + .class = ELFCLASS32, + .data = ELFDATANATIVE, + .machine = EM_ARM, + .page_offset = DEFAULT_PAGE_OFFSET, +}; + +extern unsigned long long user_page_offset; + +static int get_kernel_page_offset(struct kexec_info *info, + struct crash_elf_info *elf_info) +{ + unsigned long long stext_sym_addr = get_kernel_sym("_stext"); + if (stext_sym_addr == 0) { + if (user_page_offset != (-1ULL)) { + elf_info->page_offset = user_page_offset; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use user provided vaule: %llx\n", + elf_info->page_offset); + return 0; + } + elf_info->page_offset = (unsigned long long)DEFAULT_PAGE_OFFSET; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use default: %llx\n", + elf_info->page_offset); + return 0; + } else if ((user_page_offset != (-1ULL)) && + (user_page_offset != stext_sym_addr)) { + fprintf(stderr, "PAGE_OFFSET is set to %llx " + "instead of user provided value %llx\n", + stext_sym_addr & (~KVBASE_MASK), + user_page_offset); + } + elf_info->page_offset = stext_sym_addr & (~KVBASE_MASK); + return 0; +} + +/** + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory regions + * in @crash_memory_ranges. Number of memory regions found is placed in + * @crash_memory_nr_ranges. Regions are sorted in ascending order. + * + * Returns %0 in case of success and %-1 otherwise (errno is set). + */ +static int crash_get_memory_ranges(void) +{ + int i; + + if (usablemem_rgns.size < 1) { + errno = EINVAL; + return -1; + } + + dbgprint_mem_range("Reserved memory ranges", + crash_reserved_rgns.ranges, + crash_reserved_rgns.size); + + /* + * Exclude all reserved memory from the usable memory regions. + * We want to avoid dumping the crashkernel region itself. Note + * that this may result memory regions in usablemem_rgns being + * split. + */ + for (i = 0; i < crash_reserved_rgns.size; i++) { + if (mem_regions_exclude(&usablemem_rgns, + &crash_reserved_rgns.ranges[i])) { + fprintf(stderr, + "Error: Number of crash memory ranges excedeed the max limit\n"); + errno = ENOMEM; + return -1; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&usablemem_rgns); + + dbgprint_mem_range("Coredump memory ranges", + usablemem_rgns.ranges, usablemem_rgns.size); + + return 0; +} + +/** + * cmdline_add_elfcorehdr() - adds elfcorehdr= to @cmdline + * @cmdline: buffer where parameter is placed + * @elfcorehdr: physical address of elfcorehdr + * + * Function appends 'elfcorehdr=start' at the end of the command line given in + * @cmdline. Note that @cmdline must be at least %COMMAND_LINE_SIZE bytes long + * (inclunding %NUL). + */ +static void cmdline_add_elfcorehdr(char *cmdline, unsigned long elfcorehdr) +{ + char buf[COMMAND_LINE_SIZE]; + int buflen; + + buflen = snprintf(buf, sizeof(buf), "%s elfcorehdr=%#lx", + cmdline, elfcorehdr); + if (buflen < 0) + die("Failed to construct elfcorehdr= command line parameter\n"); + if (buflen >= sizeof(buf)) + die("Command line overflow\n"); + + (void) strncpy(cmdline, buf, COMMAND_LINE_SIZE); + cmdline[COMMAND_LINE_SIZE - 1] = '\0'; +} + +/** + * cmdline_add_mem() - adds mem= parameter to kernel command line + * @cmdline: buffer where parameter is placed + * @size: size of the kernel reserved memory (in bytes) + * + * This function appends 'mem=size' at the end of the command line given in + * @cmdline. Note that @cmdline must be at least %COMMAND_LINE_SIZE bytes long + * (including %NUL). + */ +static void cmdline_add_mem(char *cmdline, unsigned long size) +{ + char buf[COMMAND_LINE_SIZE]; + int buflen; + + buflen = snprintf(buf, sizeof(buf), "%s mem=%ldK", cmdline, size >> 10); + if (buflen < 0) + die("Failed to construct mem= command line parameter\n"); + if (buflen >= sizeof(buf)) + die("Command line overflow\n"); + + (void) strncpy(cmdline, buf, COMMAND_LINE_SIZE); + cmdline[COMMAND_LINE_SIZE - 1] = '\0'; +} + +static unsigned long long range_size(const struct memory_range *r) +{ + return r->end - r->start + 1; +} + +static void dump_memory_ranges(void) +{ + int i; + + if (!kexec_debug) + return; + + dbgprintf("crashkernel: [%#llx - %#llx] (%ldM)\n", + crash_kernel_mem.start, crash_kernel_mem.end, + (unsigned long)range_size(&crash_kernel_mem) >> 20); + + for (i = 0; i < usablemem_rgns.size; i++) { + struct memory_range *r = usablemem_rgns.ranges + i; + dbgprintf("memory range: [%#llx - %#llx] (%ldM)\n", + r->start, r->end, (unsigned long)range_size(r) >> 20); + } +} + +/** + * load_crashdump_segments() - loads additional segments needed for kdump + * @info: kexec info structure + * @mod_cmdline: kernel command line + * + * This function loads additional segments which are needed for the dump capture + * kernel. It also updates kernel command line passed in @mod_cmdline to have + * right parameters for the dump capture kernel. + * + * Return %0 in case of success and %-1 in case of error. + */ +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + int last_ranges; + + /* + * First fetch all the memory (RAM) ranges that we are going to pass to + * the crashdump kernel during panic. + */ + err = crash_get_memory_ranges(); + if (err) + return err; + + /* + * Now that we have memory regions sorted, we can use first memory + * region as PHYS_OFFSET. + */ + phys_offset = usablemem_rgns.ranges->start; + + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + dbgprintf("phys offset = %#llx, page offset = %llx\n", + phys_offset, elf_info.page_offset); + + /* + * Ensure that the crash kernel memory range is sane. The crash kernel + * must be located within memory which is visible during booting. + */ + if (crash_kernel_mem.end > ARM_MAX_VIRTUAL) { + fprintf(stderr, + "Crash kernel memory [0x%llx-0x%llx] is inaccessible at boot - unable to load crash kernel\n", + crash_kernel_mem.start, crash_kernel_mem.end); + return -1; + } + + last_ranges = usablemem_rgns.size - 1; + if (last_ranges < 0) + last_ranges = 0; + + if (crash_memory_ranges[last_ranges].end > UINT32_MAX) { + dbgprintf("Using 64-bit ELF core format\n"); + + /* for support LPAE enabled kernel*/ + elf_info.class = ELFCLASS64; + + err = crash_create_elf64_headers(info, &elf_info, + usablemem_rgns.ranges, + usablemem_rgns.size, &buf, &bufsz, + ELF_CORE_HEADER_ALIGN); + } else { + dbgprintf("Using 32-bit ELF core format\n"); + err = crash_create_elf32_headers(info, &elf_info, + usablemem_rgns.ranges, + usablemem_rgns.size, &buf, &bufsz, + ELF_CORE_HEADER_ALIGN); + } + if (err) + return err; + + /* + * We allocate ELF core header from the end of the memory area reserved + * for the crashkernel. We align the header to SECTION_SIZE (which is + * 1MB) so that available memory passed in kernel command line will be + * aligned to 1MB. This is because kernel create_mapping() wants memory + * regions to be aligned to SECTION_SIZE. + */ + elfcorehdr = add_buffer_phys_virt(info, buf, bufsz, bufsz, 1 << 20, + crash_kernel_mem.start, + crash_kernel_mem.end, -1, 0); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("elfcorehdr 0x%llx-0x%llx\n", elfcorehdr_mem.start, + elfcorehdr_mem.end); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + /* + * Add 'mem=size' parameter to dump capture kernel command line. This + * prevents the dump capture kernel from using any other memory regions + * which belong to the primary kernel. + */ + cmdline_add_mem(mod_cmdline, elfcorehdr - crash_kernel_mem.start); + + dump_memory_ranges(); + dbgprintf("kernel command line: \"%s\"\n", mod_cmdline); + + return 0; +} + +/** + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region (not NULL terminated) + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called for each memory range in /proc/iomem, stores + * the location of the crash kernel range into @crash_kernel_mem, and + * stores the system RAM into @usablemem_rgns. + */ +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL_BOOT, strlen(CRASH_KERNEL_BOOT)) == 0) { + crash_kernel_mem.start = base; + crash_kernel_mem.end = base + length - 1; + crash_kernel_mem.type = RANGE_RAM; + return mem_regions_add(&crash_reserved_rgns, + base, length, RANGE_RAM); + } + else if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) { + if (crash_kernel_mem.start == crash_kernel_mem.end) { + crash_kernel_mem.start = base; + crash_kernel_mem.end = base + length - 1; + crash_kernel_mem.type = RANGE_RAM; + } + return mem_regions_add(&crash_reserved_rgns, + base, length, RANGE_RAM); + } + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + return mem_regions_add(&usablemem_rgns, + base, length, RANGE_RAM); + } + return 0; +} + +/** + * is_crashkernel_mem_reserved() - check for the crashkernel reserved region + * + * Check for the crashkernel reserved region in /proc/iomem, and return + * true if it is present, or false otherwise. We use this to store the + * location of this region, and system RAM regions. + */ +int is_crashkernel_mem_reserved(void) +{ + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return crash_kernel_mem.start != crash_kernel_mem.end; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/arm/crashdump-arm.h b/kexec/arch/arm/crashdump-arm.h new file mode 100644 index 0000000..bbdf8bf --- /dev/null +++ b/kexec/arch/arm/crashdump-arm.h @@ -0,0 +1,27 @@ +#ifndef CRASHDUMP_ARM_H +#define CRASHDUMP_ARM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define COMMAND_LINE_SIZE 1024 +#define DEFAULT_PAGE_OFFSET (0xc0000000) +#define KVBASE_MASK (0x1ffffff) +#define CRASH_MAX_MEMORY_RANGES 32 +#define ARM_MAX_VIRTUAL UINT32_MAX + + +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_kernel_mem; +extern struct memory_range elfcorehdr_mem; + +struct kexec_info; + +extern int load_crashdump_segments(struct kexec_info *, char *); + +#ifdef __cplusplus +} +#endif + +#endif /* CRASHDUMP_ARM_H */ diff --git a/kexec/arch/arm/include/arch/options.h b/kexec/arch/arm/include/arch/options.h new file mode 100644 index 0000000..6fabfb7 --- /dev/null +++ b/kexec/arch/arm/include/arch/options.h @@ -0,0 +1,52 @@ +#ifndef KEXEC_ARCH_ARM_OPTIONS_H +#define KEXEC_ARCH_ARM_OPTIONS_H + +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+0) +#define OPT_ARCH_MAX (OPT_MAX+1) + +#define OPT_DTB (OPT_ARCH_MAX+0) +#define OPT_ATAGS (OPT_ARCH_MAX+1) +#define OPT_IMAGE_SIZE (OPT_ARCH_MAX+2) +#define OPT_PAGE_OFFSET (OPT_ARCH_MAX+3) +#define OPT_APPEND (OPT_ARCH_MAX+4) +#define OPT_RAMDISK (OPT_ARCH_MAX+5) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, 0, OPT_APPEND }, \ + { "append", 1, 0, OPT_APPEND }, \ + { "initrd", 1, 0, OPT_RAMDISK }, \ + { "ramdisk", 1, 0, OPT_RAMDISK }, \ + { "dtb", 1, 0, OPT_DTB }, \ + { "atags", 0, 0, OPT_ATAGS }, \ + { "image-size", 1, 0, OPT_IMAGE_SIZE }, \ + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "" + +extern unsigned int kexec_arm_image_size; + +#endif /* KEXEC_ARCH_ARM_OPTIONS_H */ diff --git a/kexec/arch/arm/iomem.h b/kexec/arch/arm/iomem.h new file mode 100644 index 0000000..85f958e --- /dev/null +++ b/kexec/arch/arm/iomem.h @@ -0,0 +1,9 @@ +#ifndef IOMEM_H +#define IOMEM_H + +#define SYSTEM_RAM "System RAM\n" +#define SYSTEM_RAM_BOOT "System RAM (boot alias)\n" +#define CRASH_KERNEL "Crash kernel\n" +#define CRASH_KERNEL_BOOT "Crash kernel (boot alias)\n" + +#endif diff --git a/kexec/arch/arm/kexec-arm.c b/kexec/arch/arm/kexec-arm.c new file mode 100644 index 0000000..49f35b1 --- /dev/null +++ b/kexec/arch/arm/kexec-arm.c @@ -0,0 +1,150 @@ +/* + * kexec: Linux boots Linux + * + * modified from kexec-ppc.c + * + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-arm.h" +#include <arch/options.h> +#include "../../fs2dt.h" +#include "iomem.h" + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + if (memcmp(str, SYSTEM_RAM_BOOT, strlen(SYSTEM_RAM_BOOT)) == 0 || + memcmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + type = RANGE_RAM; + } + else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else { + continue; + } + + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + /* uImage is probed before zImage because the latter also accepts + uncompressed images. */ + {"uImage", uImage_arm_probe, uImage_arm_load, zImage_arm_usage}, + {"zImage", zImage_arm_probe, zImage_arm_load, zImage_arm_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + printf(" --image-size=<size>\n" + " Specify the assumed total image size of\n" + " the kernel that is about to be loaded,\n" + " including the .bss section, as reported\n" + " by 'arm-linux-size vmlinux'. If not\n" + " specified, this value is implicitly set\n" + " to the compressed images size * 4.\n" + " --dt-no-old-root\n" + " do not reuse old kernel root= param.\n" + " while creating flatten device tree.\n"); +} + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + default: + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "arm", KEXEC_ARCH_ARM }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +/* return 1 if /sys/firmware/fdt exists, otherwise return 0 */ +int have_sysfs_fdt(void) +{ + return !access(SYSFS_FDT, F_OK); +} diff --git a/kexec/arch/arm/kexec-arm.h b/kexec/arch/arm/kexec-arm.h new file mode 100644 index 0000000..a74cce2 --- /dev/null +++ b/kexec/arch/arm/kexec-arm.h @@ -0,0 +1,22 @@ +#ifndef KEXEC_ARM_H +#define KEXEC_ARM_H + +#include <sys/types.h> + +#define SYSFS_FDT "/sys/firmware/fdt" +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +extern off_t initrd_base, initrd_size; + +int zImage_arm_probe(const char *buf, off_t len); +int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void zImage_arm_usage(void); + +int uImage_arm_probe(const char *buf, off_t len); +int uImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +extern int have_sysfs_fdt(void); + +#endif /* KEXEC_ARM_H */ diff --git a/kexec/arch/arm/kexec-elf-rel-arm.c b/kexec/arch/arm/kexec-elf-rel-arm.c new file mode 100644 index 0000000..a939cf4 --- /dev/null +++ b/kexec/arch/arm/kexec-elf-rel-arm.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_ARM) + { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_ARM_ABS32: + *((uint32_t *)location) += value; + break; + case R_ARM_REL32: + *((uint32_t *)location) += value - address; + break; + default: + die("Unknown rel relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/arm/kexec-uImage-arm.c b/kexec/arch/arm/kexec-uImage-arm.c new file mode 100644 index 0000000..03c2f4d --- /dev/null +++ b/kexec/arch/arm/kexec-uImage-arm.c @@ -0,0 +1,22 @@ +/* + * uImage support added by Marc Andre Tanner <mat@brain-dump.org> + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-arm.h" + +int uImage_arm_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_ARM); +} + +int uImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + return zImage_arm_load(argc, argv, buf + sizeof(struct image_header), + len - sizeof(struct image_header), info); +} diff --git a/kexec/arch/arm/kexec-zImage-arm.c b/kexec/arch/arm/kexec-zImage-arm.c new file mode 100644 index 0000000..8b474dd --- /dev/null +++ b/kexec/arch/arm/kexec-zImage-arm.c @@ -0,0 +1,914 @@ +/* + * - 08/21/2007 ATAG support added by Uli Luckas <u.luckas@road.de> + * + */ +#define _GNU_SOURCE +#define _XOPEN_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <unistd.h> +#include <getopt.h> +#include <unistd.h> +#include <libfdt.h> +#include <arch/options.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-arm.h" +#include "../../fs2dt.h" +#include "crashdump-arm.h" +#include "iomem.h" + +#define BOOT_PARAMS_SIZE 1536 + +off_t initrd_base, initrd_size; +unsigned int kexec_arm_image_size = 0; +unsigned long long user_page_offset = (-1ULL); + +struct zimage_header { + uint32_t instr[9]; + uint32_t magic; +#define ZIMAGE_MAGIC cpu_to_le32(0x016f2818) + uint32_t start; + uint32_t end; + uint32_t endian; + + /* Extension to the data passed to the boot agent. The offset + * points at a tagged table following a similar format to the + * ATAGs. + */ + uint32_t magic2; +#define ZIMAGE_MAGIC2 (0x45454545) + uint32_t extension_tag_offset; +}; + +struct android_image { + char magic[8]; + uint32_t kernel_size; + uint32_t kernel_addr; + uint32_t ramdisk_size; + uint32_t ramdisk_addr; + uint32_t stage2_size; + uint32_t stage2_addr; + uint32_t tags_addr; + uint32_t page_size; + uint32_t reserved1; + uint32_t reserved2; + char name[16]; + char command_line[512]; + uint32_t chksum[8]; +}; + +struct tag_header { + uint32_t size; + uint32_t tag; +}; + +/* The list must start with an ATAG_CORE node */ +#define ATAG_CORE 0x54410001 + +struct tag_core { + uint32_t flags; /* bit 0 = read-only */ + uint32_t pagesize; + uint32_t rootdev; +}; + +/* it is allowed to have multiple ATAG_MEM nodes */ +#define ATAG_MEM 0x54410002 + +struct tag_mem32 { + uint32_t size; + uint32_t start; /* physical start address */ +}; + +/* describes where the compressed ramdisk image lives (virtual address) */ +/* + * this one accidentally used virtual addresses - as such, + * it's deprecated. + */ +#define ATAG_INITRD 0x54410005 + +/* describes where the compressed ramdisk image lives (physical address) */ +#define ATAG_INITRD2 0x54420005 + +struct tag_initrd { + uint32_t start; /* physical start address */ + uint32_t size; /* size of compressed ramdisk image in bytes */ +}; + +/* command line: \0 terminated string */ +#define ATAG_CMDLINE 0x54410009 + +struct tag_cmdline { + char cmdline[1]; /* this is the minimum size */ +}; + +/* The list ends with an ATAG_NONE node. */ +#define ATAG_NONE 0x00000000 + +struct tag { + struct tag_header hdr; + union { + struct tag_core core; + struct tag_mem32 mem; + struct tag_initrd initrd; + struct tag_cmdline cmdline; + } u; +}; + +#define tag_next(t) ((struct tag *)((uint32_t *)(t) + (t)->hdr.size)) +#define byte_size(t) ((t)->hdr.size << 2) +#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type) + 3) >> 2) + +struct zimage_tag { + struct tag_header hdr; + union { +#define ZIMAGE_TAG_KRNL_SIZE cpu_to_le32(0x5a534c4b) + struct zimage_krnl_size { + uint32_t size_ptr; + uint32_t bss_size; + } krnl_size; + } u; +}; + +int zImage_arm_probe(const char *UNUSED(buf), off_t UNUSED(len)) +{ + /* + * Only zImage loading is supported. Do not check if + * the buffer is valid kernel image + */ + return 0; +} + +void zImage_arm_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --dtb=FILE Use FILE as the fdt blob.\n" + " --atags Use ATAGs instead of device-tree.\n" + " --page-offset=PAGE_OFFSET\n" + " Set PAGE_OFFSET of crash dump vmcore\n" + ); +} + +static +struct tag * atag_read_tags(void) +{ + static unsigned long buf[BOOT_PARAMS_SIZE]; + const char fn[]= "/proc/atags"; + FILE *fp; + fp = fopen(fn, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + fn, strerror(errno)); + return NULL; + } + + if (!fread(buf, sizeof(buf[1]), BOOT_PARAMS_SIZE, fp)) { + fclose(fp); + return NULL; + } + + if (ferror(fp)) { + fprintf(stderr, "Cannot read %s: %s\n", + fn, strerror(errno)); + fclose(fp); + return NULL; + } + + fclose(fp); + return (struct tag *) buf; +} + +static +int create_mem32_tag(struct tag_mem32 *tag_mem32) +{ + const char fn[]= "/proc/device-tree/memory/reg"; + uint32_t tmp[2]; + FILE *fp; + + fp = fopen(fn, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %m\n", fn); + return -1; + } + + if (fread(tmp, sizeof(tmp[0]), 2, fp) != 2) { + fprintf(stderr, "Short read %s\n", fn); + fclose(fp); + return -1; + } + + if (ferror(fp)) { + fprintf(stderr, "Cannot read %s: %m\n", fn); + fclose(fp); + return -1; + } + + /* atags_mem32 has base/size fields reversed! */ + tag_mem32->size = be32_to_cpu(tmp[1]); + tag_mem32->start = be32_to_cpu(tmp[0]); + + fclose(fp); + return 0; +} + +static +int atag_arm_load(struct kexec_info *info, unsigned long base, + const char *command_line, off_t command_line_len, const char *initrd) +{ + struct tag *saved_tags = atag_read_tags(); + char *buf; + off_t len; + struct tag *params; + + buf = xmalloc(getpagesize()); + memset(buf, 0xff, getpagesize()); + params = (struct tag *)buf; + + if (saved_tags) { + // Copy tags + saved_tags = (struct tag *) saved_tags; + while(byte_size(saved_tags)) { + switch (saved_tags->hdr.tag) { + case ATAG_INITRD: + case ATAG_INITRD2: + case ATAG_CMDLINE: + case ATAG_NONE: + // skip these tags + break; + default: + // copy all other tags + memcpy(params, saved_tags, byte_size(saved_tags)); + params = tag_next(params); + } + saved_tags = tag_next(saved_tags); + } + } else { + params->hdr.size = 2; + params->hdr.tag = ATAG_CORE; + params = tag_next(params); + + if (!create_mem32_tag(¶ms->u.mem)) { + params->hdr.size = 4; + params->hdr.tag = ATAG_MEM; + params = tag_next(params); + } + } + + if (initrd) { + params->hdr.size = tag_size(tag_initrd); + params->hdr.tag = ATAG_INITRD2; + params->u.initrd.start = initrd_base; + params->u.initrd.size = initrd_size; + params = tag_next(params); + } + + if (command_line) { + params->hdr.size = (sizeof(struct tag_header) + command_line_len + 3) >> 2; + params->hdr.tag = ATAG_CMDLINE; + memcpy(params->u.cmdline.cmdline, command_line, + command_line_len); + params->u.cmdline.cmdline[command_line_len - 1] = '\0'; + params = tag_next(params); + } + + params->hdr.size = 0; + params->hdr.tag = ATAG_NONE; + + len = ((char *)params - buf) + sizeof(struct tag_header); + + add_segment(info, buf, len, base, len); + + return 0; +} + +static int setup_dtb_prop(char **bufp, off_t *sizep, int parentoffset, + const char *node_name, const char *prop_name, + const void *val, int len) +{ + char *dtb_buf; + off_t dtb_size; + int off; + int prop_len = 0; + const struct fdt_property *prop; + + if ((bufp == NULL) || (sizep == NULL) || (*bufp == NULL)) + die("Internal error\n"); + + dtb_buf = *bufp; + dtb_size = *sizep; + + /* check if the subnode has already exist */ + off = fdt_subnode_offset(dtb_buf, parentoffset, node_name); + if (off == -FDT_ERR_NOTFOUND) { + dtb_size += fdt_node_len(node_name); + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + off = fdt_add_subnode(dtb_buf, parentoffset, node_name); + } + + if (off < 0) { + fprintf(stderr, "FDT: Error adding %s node.\n", node_name); + return -1; + } + + prop = fdt_get_property(dtb_buf, off, prop_name, &prop_len); + if ((prop == NULL) && (prop_len != -FDT_ERR_NOTFOUND)) { + die("FDT: fdt_get_property"); + } else if (prop == NULL) { + /* prop_len == -FDT_ERR_NOTFOUND */ + /* prop doesn't exist */ + dtb_size += fdt_prop_len(prop_name, len); + } else { + if (prop_len < len) + dtb_size += FDT_TAGALIGN(len - prop_len); + } + + if (fdt_totalsize(dtb_buf) < dtb_size) { + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + } + + if (fdt_setprop(dtb_buf, off, prop_name, + val, len) != 0) { + fprintf(stderr, "FDT: Error setting %s/%s property.\n", + node_name, prop_name); + return -1; + } + *bufp = dtb_buf; + *sizep = dtb_size; + return 0; +} + +static const struct zimage_tag *find_extension_tag(const char *buf, off_t len, + uint32_t tag_id) +{ + const struct zimage_header *hdr = (const struct zimage_header *)buf; + const struct zimage_tag *tag; + uint32_t offset, size; + uint32_t max = len - sizeof(struct tag_header); + + if (len < sizeof(*hdr) || + hdr->magic != ZIMAGE_MAGIC || + hdr->magic2 != ZIMAGE_MAGIC2) + return NULL; + + for (offset = hdr->extension_tag_offset; + (tag = (void *)(buf + offset)) != NULL && + offset < max && + (size = le32_to_cpu(byte_size(tag))) != 0 && + offset + size < len; + offset += size) { + dbgprintf(" offset 0x%08x tag 0x%08x size %u\n", + offset, le32_to_cpu(tag->hdr.tag), size); + if (tag->hdr.tag == tag_id) + return tag; + } + + return NULL; +} + +static int get_cells_size(void *fdt, uint32_t *address_cells, + uint32_t *size_cells) +{ + int nodeoffset; + const uint32_t *prop = NULL; + int prop_len; + + /* default values */ + *address_cells = 1; + *size_cells = 1; + + /* under root node */ + nodeoffset = fdt_path_offset(fdt, "/"); + if (nodeoffset < 0) + return -1; + + prop = fdt_getprop(fdt, nodeoffset, "#address-cells", &prop_len); + if (prop) { + if (prop_len != sizeof(*prop)) + return -1; + + *address_cells = fdt32_to_cpu(*prop); + } + + prop = fdt_getprop(fdt, nodeoffset, "#size-cells", &prop_len); + if (prop) { + if (prop_len != sizeof(*prop)) + return -1; + + *size_cells = fdt32_to_cpu(*prop); + } + + dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__, + *address_cells, *size_cells); + return 0; +} + +static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells, + struct memory_range *range) +{ + dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end); + + /* if *_cells >= 2, cells can hold 64-bit values anyway */ + if ((address_cells == 1) && (range->start >= (1ULL << 32))) + return false; + + if ((size_cells == 1) && + ((range->end - range->start + 1) >= (1ULL << 32))) + return false; + + return true; +} + +static void fill_property(void *buf, uint64_t val, uint32_t cells) +{ + uint32_t val32; + int i; + + if (cells == 1) { + val32 = cpu_to_fdt32((uint32_t)val); + memcpy(buf, &val32, sizeof(uint32_t)); + } else { + for (i = 0; + i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++) + *(char *)buf++ = 0; + + val = cpu_to_fdt64(val); + memcpy(buf, &val, sizeof(uint64_t)); + } +} + +static int setup_dtb_prop_range(char **bufp, off_t *sizep, int parentoffset, + const char *node_name, const char *prop_name, + struct memory_range *range, + uint32_t address_cells, uint32_t size_cells) +{ + void *buf, *prop; + size_t buf_size; + int result; + + buf_size = (address_cells + size_cells) * sizeof(uint32_t); + prop = buf = xmalloc(buf_size); + + fill_property(prop, range->start, address_cells); + prop += address_cells * sizeof(uint32_t); + + fill_property(prop, range->end - range->start + 1, size_cells); + prop += size_cells * sizeof(uint32_t); + + result = setup_dtb_prop(bufp, sizep, parentoffset, node_name, + prop_name, buf, buf_size); + + free(buf); + + return result; +} + +int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + unsigned long page_size = getpagesize(); + unsigned long base, kernel_base; + unsigned int atag_offset = 0x1000; /* 4k offset from memory start */ + unsigned int extra_size = 0x8000; /* TEXT_OFFSET */ + uint32_t address_cells, size_cells; + const struct zimage_tag *tag; + size_t kernel_buf_size; + size_t kernel_mem_size; + const char *command_line; + char *modified_cmdline = NULL; + off_t command_line_len; + const char *ramdisk; + const char *ramdisk_buf; + int opt; + int use_atags; + int result; + char *dtb_buf; + off_t dtb_length; + char *dtb_file; + off_t dtb_offset; + char *end; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "dtb", 1, 0, OPT_DTB }, + { "atags", 0, 0, OPT_ATAGS }, + { "image-size", 1, 0, OPT_IMAGE_SIZE }, + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = 0; + command_line_len = 0; + ramdisk = 0; + ramdisk_buf = 0; + initrd_size = 0; + use_atags = 0; + dtb_file = NULL; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DTB: + dtb_file = optarg; + break; + case OPT_ATAGS: + use_atags = 1; + break; + case OPT_IMAGE_SIZE: + kexec_arm_image_size = strtoul(optarg, &end, 0); + break; + case OPT_PAGE_OFFSET: + user_page_offset = strtoull(optarg, &end, 0); + break; + } + } + + if (use_atags && dtb_file) { + fprintf(stderr, "You can only use ATAGs if you don't specify a " + "dtb file.\n"); + return -1; + } + + if (!use_atags && !dtb_file) { + int f; + + f = have_sysfs_fdt(); + if (f) + dtb_file = SYSFS_FDT; + } + + if (command_line) { + command_line_len = strlen(command_line) + 1; + if (command_line_len > COMMAND_LINE_SIZE) + command_line_len = COMMAND_LINE_SIZE; + } + if (ramdisk) + ramdisk_buf = slurp_file_mmap(ramdisk, &initrd_size); + + if (dtb_file) + dtb_buf = slurp_file(dtb_file, &dtb_length); + + if (len > sizeof(struct zimage_header)) { + const struct zimage_header *hdr; + off_t size; + + hdr = (const struct zimage_header *)buf; + + dbgprintf("zImage header: 0x%08x 0x%08x 0x%08x\n", + hdr->magic, hdr->start, hdr->end); + + if (hdr->magic == ZIMAGE_MAGIC) { + size = le32_to_cpu(hdr->end) - le32_to_cpu(hdr->start); + + dbgprintf("zImage size 0x%llx, file size 0x%llx\n", + (unsigned long long)size, + (unsigned long long)len); + + if (size > len) { + fprintf(stderr, + "zImage is truncated - file 0x%llx vs header 0x%llx\n", + (unsigned long long)len, + (unsigned long long)size); + return -1; + } + if (size < len) + len = size; + } + } + + /* Handle android images, 2048 is the minimum page size */ + if (len > 2048 && !strncmp(buf, "ANDROID!", 8)) { + const struct android_image *aimg = (const void *)buf; + uint32_t page_size = le32_to_cpu(aimg->page_size); + uint32_t kernel_size = le32_to_cpu(aimg->kernel_size); + uint32_t ramdisk_size = le32_to_cpu(aimg->ramdisk_size); + uint32_t stage2_size = le32_to_cpu(aimg->stage2_size); + off_t aimg_size = page_size + _ALIGN(kernel_size, page_size) + + _ALIGN(ramdisk_size, page_size) + stage2_size; + + if (len < aimg_size) { + fprintf(stderr, "Android image size is incorrect\n"); + return -1; + } + + /* Get the kernel */ + buf = buf + page_size; + len = kernel_size; + + /* And the ramdisk if none was given on the command line */ + if (!ramdisk && ramdisk_size) { + initrd_size = ramdisk_size; + ramdisk_buf = buf + _ALIGN(kernel_size, page_size); + } + + /* Likewise for the command line */ + if (!command_line && aimg->command_line[0]) { + command_line = aimg->command_line; + if (command_line[sizeof(aimg->command_line) - 1]) + command_line_len = sizeof(aimg->command_line); + else + command_line_len = strlen(command_line) + 1; + } + } + + /* + * Save the length of the compressed kernel image w/o the appended DTB. + * This will be required later on when the kernel image contained + * in the zImage will be loaded into a kernel memory segment. + * And we want to load ONLY the compressed kernel image from the zImage + * and discard the appended DTB. + */ + kernel_buf_size = len; + + /* + * Always extend the zImage by four bytes to ensure that an appended + * DTB image always sees an initialised value after _edata. + */ + kernel_mem_size = len + 4; + + /* + * Check for a kernel size extension, and set or validate the + * image size. This is the total space needed to avoid the + * boot kernel BSS, so other data (such as initrd) does not get + * overwritten. + */ + tag = find_extension_tag(buf, len, ZIMAGE_TAG_KRNL_SIZE); + + /* + * The zImage length does not include its stack (4k) or its + * malloc space (64k). Include this. + */ + len += 0x11000; + + dbgprintf("zImage requires 0x%08llx bytes\n", (unsigned long long)len); + + if (tag) { + uint32_t *p = (void *)buf + le32_to_cpu(tag->u.krnl_size.size_ptr); + uint32_t edata_size = le32_to_cpu(get_unaligned(p)); + uint32_t bss_size = le32_to_cpu(tag->u.krnl_size.bss_size); + uint32_t kernel_size = edata_size + bss_size; + + dbgprintf("Decompressed kernel sizes:\n"); + dbgprintf(" text+data 0x%08lx bss 0x%08lx total 0x%08lx\n", + (unsigned long)edata_size, + (unsigned long)bss_size, + (unsigned long)kernel_size); + + /* + * While decompressing, the zImage is placed past _edata + * of the decompressed kernel. Ensure we account for that. + */ + if (kernel_size < edata_size + len) + kernel_size = edata_size + len; + + dbgprintf("Resulting kernel space: 0x%08lx\n", + (unsigned long)kernel_size); + + if (kexec_arm_image_size == 0) + kexec_arm_image_size = kernel_size; + else if (kexec_arm_image_size < kernel_size) { + fprintf(stderr, + "Kernel size is too small, increasing to 0x%lx\n", + (unsigned long)kernel_size); + kexec_arm_image_size = kernel_size; + } + } + + /* + * If the user didn't specify the size of the image, and we don't + * have the extension tables, assume the maximum kernel compression + * ratio is 4. Note that we must include space for the compressed + * image here as well. + */ + if (!kexec_arm_image_size) + kexec_arm_image_size = len * 5; + + /* + * If we are loading a dump capture kernel, we need to update kernel + * command line and also add some additional segments. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + uint64_t start, end; + + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset(modified_cmdline, '\0', COMMAND_LINE_SIZE); + + if (command_line) { + (void) strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + if (load_crashdump_segments(info, modified_cmdline) < 0) { + free(modified_cmdline); + return -1; + } + + command_line = modified_cmdline; + command_line_len = strlen(command_line) + 1; + + /* + * We put the dump capture kernel at the start of crashkernel + * reserved memory. + */ + if (parse_iomem_single(CRASH_KERNEL_BOOT, &start, &end) && + parse_iomem_single(CRASH_KERNEL, &start, &end)) { + /* + * No crash kernel memory reserved. We cannot do more + * but just bail out. + */ + return ENOCRASHKERNEL; + } + base = start; + } else { + base = locate_hole(info, len + extra_size, 0, 0, + ULONG_MAX, INT_MAX); + } + + if (base == ULONG_MAX) + return -1; + + kernel_base = base + extra_size; + + /* + * Calculate the minimum address of the initrd, which must be + * above the memory used by the zImage while it runs. This + * needs to be page-size aligned. + */ + initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, page_size); + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "Kernel", + (unsigned long)kernel_base, + (unsigned long)kexec_arm_image_size); + + if (ramdisk_buf) { + /* + * Find a hole to place the initrd. The crash kernel use + * fixed address, so no check is ok. + */ + if (!(info->kexec_flags & KEXEC_ON_CRASH)) { + initrd_base = locate_hole(info, initrd_size, page_size, + initrd_base, + ULONG_MAX, INT_MAX); + if (initrd_base == ULONG_MAX) + return -1; + } + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "Initrd", + (unsigned long)initrd_base, + (unsigned long)initrd_size); + + add_segment(info, ramdisk_buf, initrd_size, initrd_base, + initrd_size); + } + + if (use_atags) { + /* + * use ATAGs from /proc/atags + */ + if (atag_arm_load(info, base + atag_offset, + command_line, command_line_len, + ramdisk_buf) == -1) + return -1; + } else { + /* + * Read a user-specified DTB file. + */ + if (dtb_file) { + if (fdt_check_header(dtb_buf) != 0) { + fprintf(stderr, "Invalid FDT buffer.\n"); + return -1; + } + + if (command_line) { + /* + * Error should have been reported so + * directly return -1 + */ + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "bootargs", command_line, + strlen(command_line) + 1)) + return -1; + } + } else { + /* + * Extract the DTB from /proc/device-tree. + */ + create_flatten_tree(&dtb_buf, &dtb_length, command_line); + } + + /* + * Add the initrd parameters to the dtb + */ + if (ramdisk_buf) { + unsigned long start, end; + + start = cpu_to_be32((unsigned long)(initrd_base)); + end = cpu_to_be32((unsigned long)(initrd_base + initrd_size)); + + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "linux,initrd-start", &start, + sizeof(start))) + return -1; + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "linux,initrd-end", &end, + sizeof(end))) + return -1; + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + /* Determine #address-cells and #size-cells */ + result = get_cells_size(dtb_buf, &address_cells, + &size_cells); + if (result) { + fprintf(stderr, "Cannot determine cells-size.\n"); + return -1; + } + + if (!cells_size_fitted(address_cells, size_cells, + &elfcorehdr_mem)) { + fprintf(stderr, "elfcorehdr doesn't fit cells-size.\n"); + return -1; + } + + if (!cells_size_fitted(address_cells, size_cells, + &crash_kernel_mem)) { + fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n"); + return -1; + } + + /* Add linux,elfcorehdr */ + if (setup_dtb_prop_range(&dtb_buf, &dtb_length, 0, + "chosen", "linux,elfcorehdr", + &elfcorehdr_mem, + address_cells, size_cells)) + return -1; + + /* Add linux,usable-memory-range */ + if (setup_dtb_prop_range(&dtb_buf, &dtb_length, 0, + "chosen", + "linux,usable-memory-range", + &crash_kernel_mem, + address_cells, size_cells)) + return -1; + } + + /* + * The dtb must also be placed above the memory used by + * the zImage. We don't care about its position wrt the + * ramdisk, but we might as well place it after the initrd. + * We leave a buffer page between the initrd and the dtb. + */ + dtb_offset = initrd_base + initrd_size + page_size; + dtb_offset = _ALIGN_DOWN(dtb_offset, page_size); + + /* + * Find a hole to place the dtb above the initrd. + * Crash kernel use fixed address, no check is ok. + */ + if (!(info->kexec_flags & KEXEC_ON_CRASH)) { + dtb_offset = locate_hole(info, dtb_length, page_size, + dtb_offset, ULONG_MAX, INT_MAX); + if (dtb_offset == ULONG_MAX) + return -1; + } + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "DT", + (unsigned long)dtb_offset, (unsigned long)dtb_length); + + add_segment(info, dtb_buf, dtb_length, dtb_offset, dtb_length); + } + + add_segment(info, buf, kernel_buf_size, kernel_base, kernel_mem_size); + + info->entry = (void*)kernel_base; + + return 0; +} diff --git a/kexec/arch/arm/phys_to_virt.c b/kexec/arch/arm/phys_to_virt.c new file mode 100644 index 0000000..46a4f68 --- /dev/null +++ b/kexec/arch/arm/phys_to_virt.c @@ -0,0 +1,22 @@ +#include "../../kexec.h" +#include "../../crashdump.h" +#include "phys_to_virt.h" + +uint64_t phys_offset; + +/** + * phys_to_virt() - translate physical address to virtual address + * @paddr: physical address to translate + * + * For ARM we have following equation to translate from virtual address to + * physical: + * paddr = vaddr - PAGE_OFFSET + PHYS_OFFSET + * + * See also: + * http://lists.arm.linux.org.uk/lurker/message/20010723.185051.94ce743c.en.html + */ +unsigned long +phys_to_virt(struct crash_elf_info *elf_info, unsigned long long paddr) +{ + return paddr + elf_info->page_offset - phys_offset; +} diff --git a/kexec/arch/arm/phys_to_virt.h b/kexec/arch/arm/phys_to_virt.h new file mode 100644 index 0000000..b3147dd --- /dev/null +++ b/kexec/arch/arm/phys_to_virt.h @@ -0,0 +1,8 @@ +#ifndef PHYS_TO_VIRT_H +#define PHYS_TO_VIRT_H + +#include <stdint.h> + +extern uint64_t phys_offset; + +#endif diff --git a/kexec/arch/arm64/Makefile b/kexec/arch/arm64/Makefile new file mode 100644 index 0000000..59212f1 --- /dev/null +++ b/kexec/arch/arm64/Makefile @@ -0,0 +1,50 @@ + +arm64_FS2DT += kexec/fs2dt.c +arm64_FS2DT_INCLUDE += \ + -include $(srcdir)/kexec/arch/arm64/crashdump-arm64.h \ + -include $(srcdir)/kexec/arch/arm64/kexec-arm64.h + +arm64_DT_OPS += kexec/dt-ops.c + +arm64_MEM_REGIONS = kexec/mem_regions.c + +arm64_CPPFLAGS += -I $(srcdir)/kexec/ + +arm64_KEXEC_SRCS += \ + kexec/arch/arm64/crashdump-arm64.c \ + kexec/arch/arm64/kexec-arm64.c \ + kexec/arch/arm64/kexec-elf-arm64.c \ + kexec/arch/arm64/kexec-uImage-arm64.c \ + kexec/arch/arm64/kexec-image-arm64.c \ + kexec/arch/arm64/kexec-vmlinuz-arm64.c + +arm64_UIMAGE = kexec/kexec-uImage.c + +arm64_ARCH_REUSE_INITRD = +arm64_ADD_SEGMENT = +arm64_VIRT_TO_PHYS = +arm64_PHYS_TO_VIRT = + +dist += $(arm64_KEXEC_SRCS) \ + kexec/arch/arm64/include/arch/options.h \ + kexec/arch/arm64/crashdump-arm64.h \ + kexec/arch/arm64/image-header.h \ + kexec/arch/arm64/iomem.h \ + kexec/arch/arm64/kexec-arm64.h \ + kexec/arch/arm64/Makefile + +ifdef HAVE_LIBFDT + +LIBS += -lfdt + +else + +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +arm64_CPPFLAGS += -I$(srcdir)/kexec/libfdt + +arm64_KEXEC_SRCS += $(libfdt_SRCS) + +endif diff --git a/kexec/arch/arm64/crashdump-arm64.c b/kexec/arch/arm64/crashdump-arm64.c new file mode 100644 index 0000000..3098315 --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.c @@ -0,0 +1,248 @@ +/* + * ARM64 crashdump. + * partly derived from arm implementation + * + * Copyright (c) 2014-2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/elf.h> + +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "iomem.h" +#include "kexec-arm64.h" +#include "kexec-elf.h" +#include "mem_regions.h" + +/* memory ranges of crashed kernel */ +static struct memory_ranges system_memory_rgns; + +/* memory range reserved for crashkernel */ +struct memory_range crash_reserved_mem[CRASH_MAX_RESERVED_RANGES]; +struct memory_ranges usablemem_rgns = { + .size = 0, + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_mem, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info = { + .class = ELFCLASS64, +#if (__BYTE_ORDER == __LITTLE_ENDIAN) + .data = ELFDATA2LSB, +#else + .data = ELFDATA2MSB, +#endif + .machine = EM_AARCH64, +}; + +/* + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called once for each memory region found in /proc/iomem. + * It locates system RAM and crashkernel reserved memory and places these to + * variables, respectively, system_memory_rgns and usablemem_rgns. + */ + +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) + return mem_regions_alloc_and_add(&usablemem_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) + return mem_regions_alloc_and_add(&system_memory_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0) { + + unsigned long long kva_text = get_kernel_sym("_text"); + unsigned long long kva_stext = get_kernel_sym("_stext"); + unsigned long long kva_text_end = get_kernel_sym("__init_begin"); + + /* + * old: kernel_code.start = __pa_symbol(_text); + * new: kernel_code.start = __pa_symbol(_stext); + * + * For compatibility, deduce by comparing the gap "__init_begin - _stext" + * and the res size of "Kernel code" in /proc/iomem + */ + if (kva_text_end - kva_stext == length) + elf_info.kern_paddr_start = base - (kva_stext - kva_text); + else + elf_info.kern_paddr_start = base; + } + else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0) + elf_info.kern_size = base + length - elf_info.kern_paddr_start; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return usablemem_rgns.size; +} + +/* + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory + * regions in system_memory_ranges. + * Regions are sorted in ascending order. + * + * Returns 0 in case of success and a negative value otherwise. + */ +static int crash_get_memory_ranges(void) +{ + int i; + + /* + * First read all memory regions that can be considered as + * system memory including the crash area. + */ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + /* allow one or two regions for crash dump kernel */ + if (!usablemem_rgns.size) + return -EINVAL; + + dbgprint_mem_range("Reserved memory range", + usablemem_rgns.ranges, usablemem_rgns.size); + + for (i = 0; i < usablemem_rgns.size; i++) { + if (mem_regions_alloc_and_exclude(&system_memory_rgns, + &crash_reserved_mem[i])) { + fprintf(stderr, "Cannot allocate memory for ranges\n"); + return -ENOMEM; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&system_memory_rgns); + + dbgprint_mem_range("Coredump memory ranges", + system_memory_rgns.ranges, system_memory_rgns.size); + + /* + * For additional kernel code/data segment. + * kern_paddr_start/kern_size are determined in iomem_range_callback + */ + elf_info.kern_vaddr_start = get_kernel_sym("_text"); + if (!elf_info.kern_vaddr_start) + elf_info.kern_vaddr_start = UINT64_MAX; + + return 0; +} + +/* + * load_crashdump_segments() - load the elf core header + * @info: kexec info structure + * + * This function creates and loads an additional segment of elf core header + : which is used to construct /proc/vmcore on crash dump kernel. + * + * Return 0 in case of success and -1 in case of error. + */ + +int load_crashdump_segments(struct kexec_info *info) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + + /* + * First fetch all the memory (RAM) ranges that we are going to + * pass to the crash dump kernel during panic. + */ + + err = crash_get_memory_ranges(); + + if (err) + return EFAILED; + + get_page_offset((unsigned long *)&elf_info.page_offset); + dbgprintf("%s: page_offset: %016llx\n", __func__, + elf_info.page_offset); + + err = crash_create_elf64_headers(info, &elf_info, + system_memory_rgns.ranges, system_memory_rgns.size, + &buf, &bufsz, ELF_CORE_HEADER_ALIGN); + + if (err) + return EFAILED; + + elfcorehdr = add_buffer_phys_virt(info, buf, bufsz, bufsz, 0, + crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end, + -1, 0); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__, + elfcorehdr_mem.start, elfcorehdr_mem.end); + + return 0; +} + +/* + * e_entry and p_paddr are actually in virtual address space. + * Those values will be translated to physcal addresses by using + * virt_to_phys() in add_segment(). + * So let's fix up those values for later use so the memory base + * (arm64_mm.phys_offset) will be correctly replaced with + * crash_reserved_mem[usablemem_rgns.size - 1].start. + */ +void fixup_elf_addrs(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr; + int i; + + ehdr->e_entry += -arm64_mem.phys_offset + + crash_reserved_mem[usablemem_rgns.size - 1].start; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + continue; + phdr->p_paddr += + (-arm64_mem.phys_offset + + crash_reserved_mem[usablemem_rgns.size - 1].start); + } +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + if (!usablemem_rgns.size) + return -1; + + *start = crash_reserved_mem[usablemem_rgns.size - 1].start; + *end = crash_reserved_mem[usablemem_rgns.size - 1].end; + + return 0; +} diff --git a/kexec/arch/arm64/crashdump-arm64.h b/kexec/arch/arm64/crashdump-arm64.h new file mode 100644 index 0000000..82fa69b --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.h @@ -0,0 +1,29 @@ +/* + * ARM64 crashdump. + * + * Copyright (c) 2014-2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef CRASHDUMP_ARM64_H +#define CRASHDUMP_ARM64_H + +#include "kexec.h" + +#define CRASH_MAX_MEMORY_RANGES 32768 + +/* crash dump kernel support at most two regions, low_region and high region. */ +#define CRASH_MAX_RESERVED_RANGES 2 + +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_reserved_mem[]; +extern struct memory_range elfcorehdr_mem; + +extern int load_crashdump_segments(struct kexec_info *info); +extern void fixup_elf_addrs(struct mem_ehdr *ehdr); + +#endif /* CRASHDUMP_ARM64_H */ diff --git a/kexec/arch/arm64/image-header.h b/kexec/arch/arm64/image-header.h new file mode 100644 index 0000000..26bb02f --- /dev/null +++ b/kexec/arch/arm64/image-header.h @@ -0,0 +1,147 @@ +/* + * ARM64 binary image header. + */ + +#if !defined(__ARM64_IMAGE_HEADER_H) +#define __ARM64_IMAGE_HEADER_H + +#include <endian.h> +#include <stdint.h> + +/** + * struct arm64_image_header - arm64 kernel image header. + * + * @pe_sig: Optional PE format 'MZ' signature. + * @branch_code: Reserved for instructions to branch to stext. + * @text_offset: The image load offset in LSB byte order. + * @image_size: An estimated size of the memory image size in LSB byte order. + * @flags: Bit flags in LSB byte order: + * Bit 0: Image byte order: 1=MSB. + * Bit 1-2: Kernel page size: 1=4K, 2=16K, 3=64K. + * Bit 3: Image placement: 0=low. + * @reserved_1: Reserved. + * @magic: Magic number, "ARM\x64". + * @pe_header: Optional offset to a PE format header. + **/ + +struct arm64_image_header { + uint8_t pe_sig[2]; + uint16_t branch_code[3]; + uint64_t text_offset; + uint64_t image_size; + uint64_t flags; + uint64_t reserved_1[3]; + uint8_t magic[4]; + uint32_t pe_header; +}; + +static const uint8_t arm64_image_magic[4] = {'A', 'R', 'M', 0x64U}; +static const uint8_t arm64_image_pe_sig[2] = {'M', 'Z'}; +static const uint8_t arm64_pe_machtype[6] = {'P','E', 0x0, 0x0, 0x64, 0xAA}; +static const uint64_t arm64_image_flag_be = (1UL << 0); +static const uint64_t arm64_image_flag_page_size = (3UL << 1); +static const uint64_t arm64_image_flag_placement = (1UL << 3); + +/** + * enum arm64_header_page_size + */ + +enum arm64_header_page_size { + arm64_header_page_size_invalid = 0, + arm64_header_page_size_4k, + arm64_header_page_size_16k, + arm64_header_page_size_64k +}; + +/** + * arm64_header_check_magic - Helper to check the arm64 image header. + * + * Returns non-zero if header is OK. + */ + +static inline int arm64_header_check_magic(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->magic[0] == arm64_image_magic[0] + && h->magic[1] == arm64_image_magic[1] + && h->magic[2] == arm64_image_magic[2] + && h->magic[3] == arm64_image_magic[3]); +} + +/** + * arm64_header_check_pe_sig - Helper to check the arm64 image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int arm64_header_check_pe_sig(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == arm64_image_pe_sig[0] + && h->pe_sig[1] == arm64_image_pe_sig[1]); +} + +/** + * arm64_header_check_msb - Helper to check the arm64 image header. + * + * Returns non-zero if the image was built as big endian. + */ + +static inline int arm64_header_check_msb(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_be) >> 0; +} + +/** + * arm64_header_page_size + */ + +static inline enum arm64_header_page_size arm64_header_page_size( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_page_size) >> 1; +} + +/** + * arm64_header_placement + * + * Returns non-zero if the image has no physical placement restrictions. + */ + +static inline int arm64_header_placement(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_placement) >> 3; +} + +static inline uint64_t arm64_header_text_offset( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->text_offset); +} + +static inline uint64_t arm64_header_image_size( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->image_size); +} + +#endif diff --git a/kexec/arch/arm64/include/arch/options.h b/kexec/arch/arm64/include/arch/options.h new file mode 100644 index 0000000..8c695f3 --- /dev/null +++ b/kexec/arch/arm64/include/arch/options.h @@ -0,0 +1,43 @@ +#if !defined(KEXEC_ARCH_ARM64_OPTIONS_H) +#define KEXEC_ARCH_ARM64_OPTIONS_H + +#define OPT_APPEND ((OPT_MAX)+0) +#define OPT_DTB ((OPT_MAX)+1) +#define OPT_INITRD ((OPT_MAX)+2) +#define OPT_REUSE_CMDLINE ((OPT_MAX)+3) +#define OPT_SERIAL ((OPT_MAX)+4) +#define OPT_ARCH_MAX ((OPT_MAX)+5) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "append", 1, NULL, OPT_APPEND }, \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "dtb", 1, NULL, OPT_DTB }, \ + { "initrd", 1, NULL, OPT_INITRD }, \ + { "serial", 1, NULL, OPT_SERIAL }, \ + { "ramdisk", 1, NULL, OPT_INITRD }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR /* Only accept long arch options. */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +static const char arm64_opts_usage[] __attribute__ ((unused)) = +" --append=STRING Set the kernel command line to STRING.\n" +" --command-line=STRING Set the kernel command line to STRING.\n" +" --dtb=FILE Use FILE as the device tree blob.\n" +" --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --serial=STRING Name of console used for purgatory printing. (e.g. ttyAMA0)\n" +" --ramdisk=FILE Use FILE as the kernel initial ramdisk.\n" +" --reuse-cmdline Use kernel command line from running system.\n"; + +struct arm64_opts { + const char *command_line; + const char *dtb; + const char *initrd; + const char *console; +}; + +extern struct arm64_opts arm64_opts; + +#endif diff --git a/kexec/arch/arm64/iomem.h b/kexec/arch/arm64/iomem.h new file mode 100644 index 0000000..d4864bb --- /dev/null +++ b/kexec/arch/arm64/iomem.h @@ -0,0 +1,10 @@ +#ifndef IOMEM_H +#define IOMEM_H + +#define SYSTEM_RAM "System RAM\n" +#define KERNEL_CODE "Kernel code\n" +#define KERNEL_DATA "Kernel data\n" +#define CRASH_KERNEL "Crash kernel\n" +#define IOMEM_RESERVED "reserved\n" + +#endif diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c new file mode 100644 index 0000000..4a67b0d --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.c @@ -0,0 +1,1365 @@ +/* + * ARM64 kexec. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <inttypes.h> +#include <libfdt.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <linux/elf-em.h> +#include <elf.h> +#include <elf_info.h> + +#include <unistd.h> +#include <syscall.h> +#include <errno.h> +#include <linux/random.h> + +#include "kexec.h" +#include "kexec-arm64.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "dt-ops.h" +#include "fs2dt.h" +#include "iomem.h" +#include "kexec-syscall.h" +#include "mem_regions.h" +#include "arch/options.h" + +#define ROOT_NODE_ADDR_CELLS_DEFAULT 1 +#define ROOT_NODE_SIZE_CELLS_DEFAULT 1 + +#define PROP_ADDR_CELLS "#address-cells" +#define PROP_SIZE_CELLS "#size-cells" +#define PROP_ELFCOREHDR "linux,elfcorehdr" +#define PROP_USABLE_MEM_RANGE "linux,usable-memory-range" + +#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36) +#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39) +#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) +#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) +#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) + +/* Global flag which indicates that we have tried reading + * PHYS_OFFSET from 'kcore' already. + */ +static bool try_read_phys_offset_from_kcore = false; + +/* Machine specific details. */ +static int va_bits = -1; +static unsigned long page_offset; + +/* Global varables the core kexec routines expect. */ + +unsigned char reuse_initrd; + +off_t initrd_base; +off_t initrd_size; + +const struct arch_map_entry arches[] = { + { "aarch64", KEXEC_ARCH_ARM64 }, + { "aarch64_be", KEXEC_ARCH_ARM64 }, + { NULL, 0 }, +}; + +struct file_type file_type[] = { + {"vmlinux", elf_arm64_probe, elf_arm64_load, elf_arm64_usage}, + {"Image", image_arm64_probe, image_arm64_load, image_arm64_usage}, + {"uImage", uImage_arm64_probe, uImage_arm64_load, uImage_arm64_usage}, + {"vmlinuz", pez_arm64_probe, pez_arm64_load, pez_arm64_usage}, +}; + +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +/* arm64 global varables. */ + +struct arm64_opts arm64_opts; +struct arm64_mem arm64_mem = { + .phys_offset = arm64_mem_ngv, + .vp_offset = arm64_mem_ngv, +}; + +uint64_t get_phys_offset(void) +{ + assert(arm64_mem.phys_offset != arm64_mem_ngv); + return arm64_mem.phys_offset; +} + +uint64_t get_vp_offset(void) +{ + assert(arm64_mem.vp_offset != arm64_mem_ngv); + return arm64_mem.vp_offset; +} + +/** + * arm64_process_image_header - Process the arm64 image header. + * + * Make a guess that KERNEL_IMAGE_SIZE will be enough for older kernels. + */ + +int arm64_process_image_header(const struct arm64_image_header *h) +{ +#if !defined(KERNEL_IMAGE_SIZE) +# define KERNEL_IMAGE_SIZE MiB(16) +#endif + + if (!arm64_header_check_magic(h)) + return EFAILED; + + if (h->image_size) { + arm64_mem.text_offset = arm64_header_text_offset(h); + arm64_mem.image_size = arm64_header_image_size(h); + } else { + /* For 3.16 and older kernels. */ + arm64_mem.text_offset = 0x80000; + arm64_mem.image_size = KERNEL_IMAGE_SIZE; + fprintf(stderr, + "kexec: %s: Warning: Kernel image size set to %lu MiB.\n" + " Please verify compatability with lodaed kernel.\n", + __func__, KERNEL_IMAGE_SIZE / 1024UL / 1024UL); + } + + return 0; +} + +void arch_usage(void) +{ + printf(arm64_opts_usage); +} + +int arch_process_options(int argc, char **argv) +{ + static const char short_options[] = KEXEC_OPT_STR ""; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 } + }; + int opt; + char *cmdline = NULL; + const char *append = NULL; + int do_kexec_file_syscall = 0; + + for (opt = 0; opt != -1; ) { + opt = getopt_long(argc, argv, short_options, options, 0); + + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_DTB: + arm64_opts.dtb = optarg; + break; + case OPT_INITRD: + arm64_opts.initrd = optarg; + break; + case OPT_KEXEC_FILE_SYSCALL: + do_kexec_file_syscall = 1; + case OPT_SERIAL: + arm64_opts.console = optarg; + break; + default: + break; /* Ignore core and unknown options. */ + } + } + + arm64_opts.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arm64_opts.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arm64_opts.initrd); + dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, + (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" : + arm64_opts.dtb)); + dbgprintf("%s:%d: console: %s\n", __func__, __LINE__, + arm64_opts.console); + + if (do_kexec_file_syscall) + arm64_opts.dtb = NULL; + + return 0; +} + +/** + * find_purgatory_sink - Find a sink for purgatory output. + */ + +static uint64_t find_purgatory_sink(const char *console) +{ + int fd, ret; + char device[255], mem[255]; + struct stat sb; + char buffer[10]; + uint64_t iomem = 0x0; + + if (!console) + return 0; + + ret = snprintf(device, sizeof(device), "/sys/class/tty/%s", console); + if (ret < 0 || ret >= sizeof(device)) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return 0; + } + + if (stat(device, &sb) || !S_ISDIR(sb.st_mode)) { + fprintf(stderr, "kexec: %s: No valid console found for %s\n", + __func__, device); + return 0; + } + + ret = snprintf(mem, sizeof(mem), "%s%s", device, "/iomem_base"); + if (ret < 0 || ret >= sizeof(mem)) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return 0; + } + + printf("console memory read from %s\n", mem); + + fd = open(mem, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "kexec: %s: No able to open %s\n", + __func__, mem); + return 0; + } + + memset(buffer, '\0', sizeof(buffer)); + ret = read(fd, buffer, sizeof(buffer)); + if (ret < 0) { + fprintf(stderr, "kexec: %s: not able to read fd\n", __func__); + close(fd); + return 0; + } + + sscanf(buffer, "%lx", &iomem); + printf("console memory is at %#lx\n", iomem); + + close(fd); + return iomem; +} + +/** + * struct dtb - Info about a binary device tree. + * + * @buf: Device tree data. + * @size: Device tree data size. + * @name: Shorthand name of this dtb for messages. + * @path: Filesystem path. + */ + +struct dtb { + char *buf; + off_t size; + const char *name; + const char *path; +}; + +/** + * dump_reservemap - Dump the dtb's reservemap. + */ + +static void dump_reservemap(const struct dtb *dtb) +{ + int i; + + for (i = 0; ; i++) { + uint64_t address; + uint64_t size; + + fdt_get_mem_rsv(dtb->buf, i, &address, &size); + + if (!size) + break; + + dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__, + dtb->name, address, size); + } +} + +/** + * set_bootargs - Set the dtb's bootargs. + */ + +static int set_bootargs(struct dtb *dtb, const char *command_line) +{ + int result; + + if (!command_line || !command_line[0]) + return 0; + + result = dtb_set_bootargs(&dtb->buf, &dtb->size, command_line); + + if (result) { + fprintf(stderr, + "kexec: Set device tree bootargs failed.\n"); + return EFAILED; + } + + return 0; +} + +/** + * read_proc_dtb - Read /proc/device-tree. + */ + +static int read_proc_dtb(struct dtb *dtb) +{ + int result; + struct stat s; + static const char path[] = "/proc/device-tree"; + + result = stat(path, &s); + + if (result) { + dbgprintf("%s: %s\n", __func__, strerror(errno)); + return EFAILED; + } + + dtb->path = path; + create_flatten_tree((char **)&dtb->buf, &dtb->size, NULL); + + return 0; +} + +/** + * read_sys_dtb - Read /sys/firmware/fdt. + */ + +static int read_sys_dtb(struct dtb *dtb) +{ + int result; + struct stat s; + static const char path[] = "/sys/firmware/fdt"; + + result = stat(path, &s); + + if (result) { + dbgprintf("%s: %s\n", __func__, strerror(errno)); + return EFAILED; + } + + dtb->path = path; + dtb->buf = slurp_file(path, &dtb->size); + + return 0; +} + +/** + * read_1st_dtb - Read the 1st stage kernel's dtb. + */ + +static int read_1st_dtb(struct dtb *dtb) +{ + int result; + + dtb->name = "dtb_sys"; + result = read_sys_dtb(dtb); + + if (!result) + goto on_success; + + dtb->name = "dtb_proc"; + result = read_proc_dtb(dtb); + + if (!result) + goto on_success; + + dbgprintf("%s: not found\n", __func__); + return EFAILED; + +on_success: + dbgprintf("%s: found %s\n", __func__, dtb->path); + return 0; +} + +static int get_cells_size(void *fdt, uint32_t *address_cells, + uint32_t *size_cells) +{ + int nodeoffset; + const uint32_t *prop = NULL; + int prop_len; + + /* default values */ + *address_cells = ROOT_NODE_ADDR_CELLS_DEFAULT; + *size_cells = ROOT_NODE_SIZE_CELLS_DEFAULT; + + /* under root node */ + nodeoffset = fdt_path_offset(fdt, "/"); + if (nodeoffset < 0) + goto on_error; + + prop = fdt_getprop(fdt, nodeoffset, PROP_ADDR_CELLS, &prop_len); + if (prop) { + if (prop_len == sizeof(*prop)) + *address_cells = fdt32_to_cpu(*prop); + else + goto on_error; + } + + prop = fdt_getprop(fdt, nodeoffset, PROP_SIZE_CELLS, &prop_len); + if (prop) { + if (prop_len == sizeof(*prop)) + *size_cells = fdt32_to_cpu(*prop); + else + goto on_error; + } + + dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__, + *address_cells, *size_cells); + return 0; + +on_error: + return EFAILED; +} + +static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells, + struct memory_range *range) +{ + dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end); + + /* if *_cells >= 2, cells can hold 64-bit values anyway */ + if ((address_cells == 1) && (range->start >= (1ULL << 32))) + return false; + + if ((size_cells == 1) && + ((range->end - range->start + 1) >= (1ULL << 32))) + return false; + + return true; +} + +static void fill_property(void *buf, uint64_t val, uint32_t cells) +{ + uint32_t val32; + int i; + + if (cells == 1) { + val32 = cpu_to_fdt32((uint32_t)val); + memcpy(buf, &val32, sizeof(uint32_t)); + } else { + for (i = 0; + i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++) + *(char *)buf++ = 0; + + val = cpu_to_fdt64(val); + memcpy(buf, &val, sizeof(uint64_t)); + } +} + +static int fdt_setprop_ranges(void *fdt, int nodeoffset, const char *name, + struct memory_range *ranges, int nr_ranges, bool reverse, + uint32_t address_cells, uint32_t size_cells) +{ + void *buf, *prop; + size_t buf_size; + int i, result; + struct memory_range *range; + + buf_size = (address_cells + size_cells) * sizeof(uint32_t) * nr_ranges; + prop = buf = xmalloc(buf_size); + if (!buf) + return -ENOMEM; + + for (i = 0; i < nr_ranges; i++) { + if (reverse) + range = ranges + (nr_ranges - 1 - i); + else + range = ranges + i; + + fill_property(prop, range->start, address_cells); + prop += address_cells * sizeof(uint32_t); + + fill_property(prop, range->end - range->start + 1, size_cells); + prop += size_cells * sizeof(uint32_t); + } + + result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size); + + free(buf); + + return result; +} + +/** + * setup_2nd_dtb - Setup the 2nd stage kernel's dtb. + */ + +static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash) +{ + uint32_t address_cells, size_cells; + uint64_t fdt_val64; + uint64_t *prop; + char *new_buf = NULL; + int len, range_len; + int nodeoffset; + int new_size; + int i, result, kaslr_seed; + + result = fdt_check_header(dtb->buf); + + if (result) { + fprintf(stderr, "kexec: Invalid 2nd device tree.\n"); + return EFAILED; + } + + result = set_bootargs(dtb, command_line); + if (result) { + fprintf(stderr, "kexec: cannot set bootargs.\n"); + result = -EINVAL; + goto on_error; + } + + /* determine #address-cells and #size-cells */ + result = get_cells_size(dtb->buf, &address_cells, &size_cells); + if (result) { + fprintf(stderr, "kexec: cannot determine cells-size.\n"); + result = -EINVAL; + goto on_error; + } + + if (!cells_size_fitted(address_cells, size_cells, + &elfcorehdr_mem)) { + fprintf(stderr, "kexec: elfcorehdr doesn't fit cells-size.\n"); + result = -EINVAL; + goto on_error; + } + + for (i = 0; i < usablemem_rgns.size; i++) { + if (!cells_size_fitted(address_cells, size_cells, + &crash_reserved_mem[i])) { + fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n"); + result = -EINVAL; + goto on_error; + } + } + + /* duplicate dt blob */ + range_len = sizeof(uint32_t) * (address_cells + size_cells); + new_size = fdt_totalsize(dtb->buf) + + fdt_prop_len(PROP_ELFCOREHDR, range_len) + + fdt_prop_len(PROP_USABLE_MEM_RANGE, range_len * usablemem_rgns.size); + + new_buf = xmalloc(new_size); + result = fdt_open_into(dtb->buf, new_buf, new_size); + if (result) { + dbgprintf("%s: fdt_open_into failed: %s\n", __func__, + fdt_strerror(result)); + result = -ENOSPC; + goto on_error; + } + + /* fixup 'kaslr-seed' with a random value, if supported */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + prop = fdt_getprop_w(new_buf, nodeoffset, + "kaslr-seed", &len); + if (!prop || len != sizeof(uint64_t)) { + dbgprintf("%s: no kaslr-seed found\n", + __func__); + /* for kexec warm reboot case, we don't need to fixup + * other dtb properties + */ + if (!on_crash) { + dump_reservemap(dtb); + if (new_buf) + free(new_buf); + + return result; + } + } else { + kaslr_seed = fdt64_to_cpu(*prop); + + /* kaslr_seed must be wiped clean by primary + * kernel during boot + */ + if (kaslr_seed != 0) { + dbgprintf("%s: kaslr-seed is not wiped to 0.\n", + __func__); + result = -EINVAL; + goto on_error; + } + + /* + * Invoke the getrandom system call with + * GRND_NONBLOCK, to make sure we + * have a valid random seed to pass to the + * secondary kernel. + */ + result = syscall(SYS_getrandom, &fdt_val64, + sizeof(fdt_val64), + GRND_NONBLOCK); + + if(result == -1) { + fprintf(stderr, "%s: Reading random bytes failed.\n", + __func__); + + /* Currently on some arm64 platforms this + * 'getrandom' system call fails while booting + * the platform. + * + * In case, this happens at best we can set + * the 'kaslr_seed' as 0, indicating that the + * 2nd kernel will be booted with a 'nokaslr' + * like behaviour. + */ + fdt_val64 = 0UL; + dbgprintf("%s: Disabling KASLR in secondary kernel.\n", + __func__); + } + + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_inplace(new_buf, + nodeoffset, "kaslr-seed", + &fdt_val64, sizeof(fdt_val64)); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", + __func__, fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + } + + if (on_crash) { + /* add linux,elfcorehdr */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_ranges(new_buf, nodeoffset, + PROP_ELFCOREHDR, &elfcorehdr_mem, 1, false, + address_cells, size_cells); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + + /* + * add linux,usable-memory-range + * + * crash dump kernel support one or two regions, to make + * compatibility with existing user-space and older kdump, the + * low region is always the last one. + */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_ranges(new_buf, nodeoffset, + PROP_USABLE_MEM_RANGE, + usablemem_rgns.ranges, usablemem_rgns.size, true, + address_cells, size_cells); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + } + + fdt_pack(new_buf); + dtb->buf = new_buf; + dtb->size = fdt_totalsize(new_buf); + + dump_reservemap(dtb); + + return result; + +on_error: + fprintf(stderr, "kexec: %s failed.\n", __func__); + if (new_buf) + free(new_buf); + + return result; +} + +unsigned long arm64_locate_kernel_segment(struct kexec_info *info) +{ + unsigned long hole; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + unsigned long hole_end; + + hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ? + mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start); + hole = _ALIGN_UP(hole, MiB(2)); + hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size; + + if ((hole_end > mem_max) || + (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) { + dbgprintf("%s: Crash kernel out of range\n", __func__); + hole = ULONG_MAX; + } + } else { + hole = locate_hole(info, + arm64_mem.text_offset + arm64_mem.image_size, + MiB(2), 0, ULONG_MAX, 1); + + if (hole == ULONG_MAX) + dbgprintf("%s: locate_hole failed\n", __func__); + } + + return hole; +} + +/** + * arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments. + */ + +int arm64_load_other_segments(struct kexec_info *info, + unsigned long image_base) +{ + int result; + unsigned long dtb_base; + unsigned long hole_min; + unsigned long hole_max; + unsigned long initrd_end; + uint64_t purgatory_sink; + char *initrd_buf = NULL; + struct dtb dtb; + char command_line[COMMAND_LINE_SIZE] = ""; + + if (arm64_opts.command_line) { + if (strlen(arm64_opts.command_line) > + sizeof(command_line) - 1) { + fprintf(stderr, + "Kernel command line too long for kernel!\n"); + return EFAILED; + } + + strncpy(command_line, arm64_opts.command_line, + sizeof(command_line) - 1); + command_line[sizeof(command_line) - 1] = 0; + } + + purgatory_sink = find_purgatory_sink(arm64_opts.console); + + dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__, + purgatory_sink); + + if (arm64_opts.dtb) { + dtb.name = "dtb_user"; + dtb.buf = slurp_file(arm64_opts.dtb, &dtb.size); + } else { + result = read_1st_dtb(&dtb); + + if (result) { + fprintf(stderr, + "kexec: Error: No device tree available.\n"); + return EFAILED; + } + } + + result = setup_2nd_dtb(&dtb, command_line, + info->kexec_flags & KEXEC_ON_CRASH); + + if (result) + return EFAILED; + + /* Put the other segments after the image. */ + + hole_min = image_base + arm64_mem.image_size; + if (info->kexec_flags & KEXEC_ON_CRASH) + hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end; + else + hole_max = ULONG_MAX; + + if (arm64_opts.initrd) { + initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size); + + if (!initrd_buf) + fprintf(stderr, "kexec: Empty ramdisk file.\n"); + else { + /* Put the initrd after the kernel. */ + + initrd_base = add_buffer_phys_virt(info, initrd_buf, + initrd_size, initrd_size, 0, + hole_min, hole_max, 1, 0); + + initrd_end = initrd_base + initrd_size; + + /* Check limits as specified in booting.txt. + * The kernel may have as little as 32 GB of address space to map + * system memory and both kernel and initrd must be 1GB aligend. + */ + + if (_ALIGN_UP(initrd_end, GiB(1)) - _ALIGN_DOWN(image_base, GiB(1)) > GiB(32)) { + fprintf(stderr, "kexec: Error: image + initrd too big.\n"); + return EFAILED; + } + + dbgprintf("initrd: base %lx, size %lxh (%ld)\n", + initrd_base, initrd_size, initrd_size); + + result = dtb_set_initrd((char **)&dtb.buf, + &dtb.size, initrd_base, + initrd_base + initrd_size); + + if (result) + return EFAILED; + } + } + + if (!initrd_buf) { + /* Don't reuse the initrd addresses from 1st DTB */ + dtb_clear_initrd((char **)&dtb.buf, &dtb.size); + } + + /* Check size limit as specified in booting.txt. */ + + if (dtb.size > MiB(2)) { + fprintf(stderr, "kexec: Error: dtb too big.\n"); + return EFAILED; + } + + dtb_base = add_buffer_phys_virt(info, dtb.buf, dtb.size, dtb.size, + 0, hole_min, hole_max, 1, 0); + + /* dtb_base is valid if we got here. */ + + dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb.size, + dtb.size); + + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + hole_min, hole_max, 1, 0); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "purgatory_start"); + + elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink, + sizeof(purgatory_sink)); + + elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", &image_base, + sizeof(image_base)); + + elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base, + sizeof(dtb_base)); + + return 0; +} + +/** + * virt_to_phys - For processing elf file values. + */ + +unsigned long virt_to_phys(unsigned long v) +{ + unsigned long p; + + p = v - get_vp_offset() + get_phys_offset(); + + return p; +} + +/** + * phys_to_virt - For crashdump setup. + */ + +unsigned long phys_to_virt(struct crash_elf_info *elf_info, + unsigned long long p) +{ + unsigned long v; + + v = p - get_phys_offset() + elf_info->page_offset; + + return v; +} + +/** + * add_segment - Use virt_to_phys when loading elf files. + */ + +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +static inline void set_phys_offset(int64_t v, char *set_method) +{ + if (arm64_mem.phys_offset == arm64_mem_ngv + || v < arm64_mem.phys_offset) { + arm64_mem.phys_offset = v; + dbgprintf("%s: phys_offset : %016lx (method : %s)\n", + __func__, arm64_mem.phys_offset, + set_method); + } +} + +/** + * get_va_bits - Helper for getting VA_BITS + */ + +static int get_va_bits(void) +{ + unsigned long long stext_sym_addr; + + /* + * if already got from kcore + */ + if (va_bits != -1) + goto out; + + + /* For kernel older than v4.19 */ + fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n"); + stext_sym_addr = get_kernel_sym("_stext"); + + if (stext_sym_addr == 0) { + fprintf(stderr, "Can't get the symbol of _stext.\n"); + return -1; + } + + /* Derive va_bits as per arch/arm64/Kconfig */ + if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) { + va_bits = 36; + } else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) { + va_bits = 39; + } else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) { + va_bits = 42; + } else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) { + va_bits = 47; + } else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) { + va_bits = 48; + } else { + fprintf(stderr, + "Cannot find a proper _stext for calculating VA_BITS\n"); + return -1; + } + +out: + dbgprintf("va_bits : %d\n", va_bits); + + return 0; +} + +/** + * get_page_offset - Helper for getting PAGE_OFFSET + */ + +int get_page_offset(unsigned long *page_offset) +{ + unsigned long long text_sym_addr, kernel_va_mid; + int ret; + + text_sym_addr = get_kernel_sym("_text"); + if (text_sym_addr == 0) { + fprintf(stderr, "Can't get the symbol of _text to calculate page_offset.\n"); + return -1; + } + + ret = get_va_bits(); + if (ret < 0) + return ret; + + /* Since kernel 5.4, kernel image is put above + * UINT64_MAX << (va_bits - 1) + */ + kernel_va_mid = UINT64_MAX << (va_bits - 1); + /* older kernel */ + if (text_sym_addr < kernel_va_mid) + *page_offset = UINT64_MAX << (va_bits - 1); + else + *page_offset = UINT64_MAX << va_bits; + + dbgprintf("page_offset : %lx\n", *page_offset); + + return 0; +} + +static void arm64_scan_vmcoreinfo(char *pos) +{ + const char *str; + + str = "NUMBER(VA_BITS)="; + if (memcmp(str, pos, strlen(str)) == 0) + va_bits = strtoul(pos + strlen(str), NULL, 10); +} + +/** + * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET (and va_bits) + * from VMCOREINFO note inside 'kcore'. + */ + +static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset) +{ + int fd, ret = 0; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo; + ret = read_phys_offset_elf_kcore(fd, phys_offset); + + close(fd); + return ret; +} + +/** + * get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET + * from PT_LOADs inside 'kcore'. + */ + +int get_phys_base_from_pt_load(long *phys_offset) +{ + int i, fd, ret; + unsigned long long phys_start; + unsigned long long virt_start; + + ret = get_page_offset(&page_offset); + if (ret < 0) + return ret; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + read_elf(fd); + + for (i = 0; get_pt_load(i, + &phys_start, NULL, &virt_start, NULL); + i++) { + if (virt_start != NOT_KV_ADDR + && virt_start >= page_offset + && phys_start != NOT_PADDR) + *phys_offset = phys_start - + (virt_start & ~page_offset); + } + + close(fd); + return 0; +} + +static bool to_be_excluded(char *str, unsigned long long start, unsigned long long end) +{ + if (!strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) { + uint64_t load_start, load_end; + + if (!get_crash_kernel_load_range(&load_start, &load_end) && + (load_start == start) && (load_end == end)) + return false; + + return true; + } + + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA))) + return false; + else + return true; +} + +/** + * get_memory_ranges - Try to get the memory ranges from + * /proc/iomem. + */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + long phys_offset = -1; + FILE *fp; + const char *iomem = proc_iomem(); + char line[MAX_LINE], *str; + unsigned long long start, end; + int n, consumed; + struct memory_ranges memranges; + struct memory_range *last, excl_range; + int ret; + + if (!try_read_phys_offset_from_kcore) { + /* Since kernel version 4.19, 'kcore' contains + * a new PT_NOTE which carries the VMCOREINFO + * information. + * If the same is available, one should prefer the + * same to retrieve 'PHYS_OFFSET' value exported by + * the kernel as this is now the standard interface + * exposed by kernel for sharing machine specific + * details with the userland. + */ + ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset); + if (!ret) { + if (phys_offset != -1) + set_phys_offset(phys_offset, + "vmcoreinfo pt_note"); + } else { + /* If we are running on a older kernel, + * try to retrieve the 'PHYS_OFFSET' value + * exported by the kernel in the 'kcore' + * file by reading the PT_LOADs and determining + * the correct combination. + */ + ret = get_phys_base_from_pt_load(&phys_offset); + if (!ret) + if (phys_offset != -1) + set_phys_offset(phys_offset, + "pt_load"); + } + + try_read_phys_offset_from_kcore = true; + } + + fp = fopen(iomem, "r"); + if (!fp) + die("Cannot open %s\n", iomem); + + memranges.ranges = NULL; + memranges.size = memranges.max_size = 0; + + while (fgets(line, sizeof(line), fp) != 0) { + n = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (n != 2) + continue; + str = line + consumed; + + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) { + ret = mem_regions_alloc_and_add(&memranges, + start, end - start + 1, RANGE_RAM); + if (ret) { + fprintf(stderr, + "Cannot allocate memory for ranges\n"); + fclose(fp); + return -ENOMEM; + } + + dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__, + memranges.size - 1, + memranges.ranges[memranges.size - 1].start, + memranges.ranges[memranges.size - 1].end); + } else if (to_be_excluded(str, start, end)) { + if (!memranges.size) + continue; + + /* + * Note: mem_regions_exclude() doesn't guarantee + * that the ranges are sorted out, but as long as + * we cope with /proc/iomem, we only operate on + * the last entry and so it is safe. + */ + + /* The last System RAM range */ + last = &memranges.ranges[memranges.size - 1]; + + if (last->end < start) + /* New resource outside of System RAM */ + continue; + if (end < last->start) + /* Already excluded by parent resource */ + continue; + + excl_range.start = start; + excl_range.end = end; + ret = mem_regions_alloc_and_exclude(&memranges, &excl_range); + if (ret) { + fprintf(stderr, + "Cannot allocate memory for ranges (exclude)\n"); + fclose(fp); + return -ENOMEM; + } + dbgprintf("%s:- %016llx - %016llx\n", + __func__, start, end); + } + } + + fclose(fp); + + *range = memranges.ranges; + *ranges = memranges.size; + + /* As a fallback option, we can try determining the PHYS_OFFSET + * value from the '/proc/iomem' entries as well. + * + * But note that this can be flaky, as on certain arm64 + * platforms, it has been noticed that due to a hole at the + * start of physical ram exposed to kernel + * (i.e. it doesn't start from address 0), the kernel still + * calculates the 'memstart_addr' kernel variable as 0. + * + * Whereas the SYSTEM_RAM or IOMEM_RESERVED range in + * '/proc/iomem' would carry a first entry whose start address + * is non-zero (as the physical ram exposed to the kernel + * starts from a non-zero address). + * + * In such cases, if we rely on '/proc/iomem' entries to + * calculate the phys_offset, then we will have mismatch + * between the user-space and kernel space 'PHYS_OFFSET' + * value. + */ + if (memranges.size) + set_phys_offset(memranges.ranges[0].start, "iomem"); + + dbgprint_mem_range("System RAM ranges;", + memranges.ranges, memranges.size); + + return 0; +} + +int arch_compat_trampoline(struct kexec_info *info) +{ + return 0; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + return (ehdr->e_machine == EM_AARCH64); +} + +enum aarch64_rel_type { + R_AARCH64_NONE = 0, + R_AARCH64_ABS64 = 257, + R_AARCH64_PREL32 = 261, + R_AARCH64_MOVW_UABS_G0_NC = 264, + R_AARCH64_MOVW_UABS_G1_NC = 266, + R_AARCH64_MOVW_UABS_G2_NC = 268, + R_AARCH64_MOVW_UABS_G3 =269, + R_AARCH64_LD_PREL_LO19 = 273, + R_AARCH64_ADR_PREL_LO21 = 274, + R_AARCH64_ADR_PREL_PG_HI21 = 275, + R_AARCH64_ADD_ABS_LO12_NC = 277, + R_AARCH64_JUMP26 = 282, + R_AARCH64_CALL26 = 283, + R_AARCH64_LDST64_ABS_LO12_NC = 286, + R_AARCH64_LDST128_ABS_LO12_NC = 299 +}; + +static uint32_t get_bits(uint32_t value, int start, int end) +{ + uint32_t mask = ((uint32_t)1 << (end + 1 - start)) - 1; + return (value >> start) & mask; +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *UNUSED(sym), + unsigned long r_type, void *ptr, unsigned long address, + unsigned long value) +{ + uint64_t *loc64; + uint32_t *loc32; + uint64_t *location = (uint64_t *)ptr; + uint64_t data = *location; + uint64_t imm; + const char *type = NULL; + + switch((enum aarch64_rel_type)r_type) { + case R_AARCH64_ABS64: + type = "ABS64"; + loc64 = ptr; + *loc64 = cpu_to_elf64(ehdr, value); + break; + case R_AARCH64_PREL32: + type = "PREL32"; + loc32 = ptr; + *loc32 = cpu_to_elf32(ehdr, value - address); + break; + + /* Set a MOV[KZ] immediate field to bits [15:0] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G0_NC: + type = "MOVW_UABS_G0_NC"; + loc32 = ptr; + imm = get_bits(value, 0, 15); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [31:16] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G1_NC: + type = "MOVW_UABS_G1_NC"; + loc32 = ptr; + imm = get_bits(value, 16, 31); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [47:32] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G2_NC: + type = "MOVW_UABS_G2_NC"; + loc32 = ptr; + imm = get_bits(value, 32, 47); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [63:48] of X */ + case R_AARCH64_MOVW_UABS_G3: + type = "MOVW_UABS_G3"; + loc32 = ptr; + imm = get_bits(value, 48, 63); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + + case R_AARCH64_LD_PREL_LO19: + type = "LD_PREL_LO19"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) << 3) & 0xffffe0)); + break; + case R_AARCH64_ADR_PREL_LO21: + if (value & 3) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "ADR_PREL_LO21"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) << 3) & 0xffffe0)); + break; + case R_AARCH64_ADR_PREL_PG_HI21: + type = "ADR_PREL_PG_HI21"; + imm = ((value & ~0xfff) - (address & ~0xfff)) >> 12; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((imm & 3) << 29) + ((imm & 0x1ffffc) << (5 - 2))); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + type = "ADD_ABS_LO12_NC"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((value & 0xfff) << 10)); + break; + case R_AARCH64_JUMP26: + type = "JUMP26"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) >> 2) & 0x3ffffff)); + break; + case R_AARCH64_CALL26: + type = "CALL26"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) >> 2) & 0x3ffffff)); + break; + /* encode imm field with bits [11:3] of value */ + case R_AARCH64_LDST64_ABS_LO12_NC: + if (value & 7) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "LDST64_ABS_LO12_NC"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((value & 0xff8) << (10 - 3))); + break; + + /* encode imm field with bits [11:4] of value */ + case R_AARCH64_LDST128_ABS_LO12_NC: + if (value & 15) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "LDST128_ABS_LO12_NC"; + loc32 = ptr; + imm = value & 0xff0; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << (10 - 4))); + break; + default: + die("%s: ERROR Unknown type: %lu\n", __func__, r_type); + break; + } + + dbgprintf("%s: %s %016lx->%016lx\n", __func__, type, data, *location); +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h new file mode 100644 index 0000000..95fb5c2 --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.h @@ -0,0 +1,84 @@ +/* + * ARM64 kexec. + */ + +#if !defined(KEXEC_ARM64_H) +#define KEXEC_ARM64_H + +#include <stdbool.h> +#include <sys/types.h> + +#include "image-header.h" +#include "kexec.h" + +#define KEXEC_SEGMENT_MAX 64 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 +#define COMMAND_LINE_SIZE 2048 /* from kernel */ + +#define KiB(x) ((x) * 1024UL) +#define MiB(x) (KiB(x) * 1024UL) +#define GiB(x) (MiB(x) * 1024UL) + +#define ULONGLONG_MAX (~0ULL) + +/* + * Incorrect address + */ +#define NOT_KV_ADDR (0x0) +#define NOT_PADDR (ULONGLONG_MAX) + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size); +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void elf_arm64_usage(void); + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size); +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void image_arm64_usage(void); + +int uImage_arm64_probe(const char *buf, off_t len); +int uImage_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void uImage_arm64_usage(void); + +int pez_arm64_probe(const char *kernel_buf, off_t kernel_size); +int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void pez_arm64_usage(void); + + +extern off_t initrd_base; +extern off_t initrd_size; + +/** + * struct arm64_mem - Memory layout info. + */ + +struct arm64_mem { + int64_t phys_offset; + uint64_t text_offset; + uint64_t image_size; + uint64_t vp_offset; +}; + +#define arm64_mem_ngv UINT64_MAX +extern struct arm64_mem arm64_mem; + +uint64_t get_phys_offset(void); +uint64_t get_vp_offset(void); +int get_page_offset(unsigned long *offset); + +static inline void reset_vp_offset(void) +{ + arm64_mem.vp_offset = arm64_mem_ngv; +} + +int arm64_process_image_header(const struct arm64_image_header *h); +unsigned long arm64_locate_kernel_segment(struct kexec_info *info); +int arm64_load_other_segments(struct kexec_info *info, + unsigned long image_base); + +#endif diff --git a/kexec/arch/arm64/kexec-elf-arm64.c b/kexec/arch/arm64/kexec-elf-arm64.c new file mode 100644 index 0000000..e14f8e9 --- /dev/null +++ b/kexec/arch/arm64/kexec-elf-arm64.c @@ -0,0 +1,170 @@ +/* + * ARM64 kexec elf support. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <linux/elf.h> + +#include "arch/options.h" +#include "crashdump-arm64.h" +#include "kexec-arm64.h" +#include "kexec-elf.h" +#include "kexec-syscall.h" + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: Not an ELF executable.\n", __func__); + goto on_exit; + } + + if (ehdr.e_machine != EM_AARCH64) { + dbgprintf("%s: Not an AARCH64 ELF executable.\n", __func__); + result = -1; + goto on_exit; + } + + result = 0; +on_exit: + free_elf_info(&ehdr); + return result; +} + +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct arm64_image_header *header = NULL; + unsigned long kernel_segment; + struct mem_ehdr ehdr; + int result; + int i; + + if (info->file_mode) { + fprintf(stderr, + "ELF executable is not supported in kexec_file\n"); + + return EFAILED; + } + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: build_elf_exec_info failed\n", __func__); + goto exit; + } + + /* Find and process the arm64 image header. */ + + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr = &ehdr.e_phdr[i]; + unsigned long header_offset; + + if (phdr->p_type != PT_LOAD) + continue; + + /* + * When CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET=y the image header + * could be offset in the elf segment. The linker script sets + * ehdr.e_entry to the start of text. + */ + + header_offset = ehdr.e_entry - phdr->p_vaddr; + + header = (const struct arm64_image_header *)( + kernel_buf + phdr->p_offset + header_offset); + + if (!arm64_process_image_header(header)) { + dbgprintf("%s: e_entry: %016llx\n", __func__, + ehdr.e_entry); + dbgprintf("%s: p_vaddr: %016llx\n", __func__, + phdr->p_vaddr); + dbgprintf("%s: header_offset: %016lx\n", __func__, + header_offset); + + break; + } + } + + if (i == ehdr.e_phnum) { + dbgprintf("%s: Valid arm64 header not found\n", __func__); + result = EFAILED; + goto exit; + } + + kernel_segment = arm64_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + arm64_mem.vp_offset = _ALIGN_DOWN(ehdr.e_entry, MiB(2)); + if (!(info->kexec_flags & KEXEC_ON_CRASH)) + arm64_mem.vp_offset -= kernel_segment - get_phys_offset(); + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: text_offset: %016lx\n", __func__, + arm64_mem.text_offset); + dbgprintf("%s: image_size: %016lx\n", __func__, + arm64_mem.image_size); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + arm64_mem.phys_offset); + dbgprintf("%s: vp_offset: %016lx\n", __func__, + arm64_mem.vp_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in elf header in order to load + * vmlinux (elf_exec) into crash kernel's memory + */ + fixup_elf_addrs(&ehdr); + + result = elf_exec_load(&ehdr, info); + + if (result) { + dbgprintf("%s: elf_exec_load failed\n", __func__); + goto exit; + } + + /* load additional data */ + result = arm64_load_other_segments(info, kernel_segment + + arm64_mem.text_offset); + +exit: + reset_vp_offset(); + free_elf_info(&ehdr); + if (result) + fprintf(stderr, "kexec: Bad elf image file, load failed.\n"); + return result; +} + +void elf_arm64_usage(void) +{ + printf( +" An ARM64 ELF image, big or little endian.\n" +" Typically vmlinux or a stripped version of vmlinux.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-image-arm64.c b/kexec/arch/arm64/kexec-image-arm64.c new file mode 100644 index 0000000..a196747 --- /dev/null +++ b/kexec/arch/arm64/kexec-image-arm64.c @@ -0,0 +1,119 @@ +/* + * ARM64 kexec binary image support. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include "crashdump-arm64.h" +#include "image-header.h" +#include "kexec.h" +#include "kexec-arm64.h" +#include "kexec-syscall.h" +#include "arch/options.h" + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + const struct arm64_image_header *h; + + if (kernel_size < sizeof(struct arm64_image_header)) { + dbgprintf("%s: No arm64 image header.\n", __func__); + return -1; + } + + h = (const struct arm64_image_header *)(kernel_buf); + + if (!arm64_header_check_magic(h)) { + dbgprintf("%s: Bad arm64 image header.\n", __func__); + return -1; + } + + return 0; +} + +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct arm64_image_header *header; + unsigned long kernel_segment; + int result; + + if (info->file_mode) { + if (arm64_opts.initrd) { + info->initrd_fd = open(arm64_opts.initrd, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, + "Could not open initrd file %s:%s\n", + arm64_opts.initrd, strerror(errno)); + result = EFAILED; + goto exit; + } + } + + if (arm64_opts.command_line) { + info->command_line = (char *)arm64_opts.command_line; + info->command_line_len = + strlen(arm64_opts.command_line) + 1; + } + + return 0; + } + + header = (const struct arm64_image_header *)(kernel_buf); + + if (arm64_process_image_header(header)) + return EFAILED; + + kernel_segment = arm64_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: text_offset: %016lx\n", __func__, + arm64_mem.text_offset); + dbgprintf("%s: image_size: %016lx\n", __func__, + arm64_mem.image_size); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + arm64_mem.phys_offset); + dbgprintf("%s: vp_offset: %016lx\n", __func__, + arm64_mem.vp_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + add_segment_phys_virt(info, kernel_buf, kernel_size, + kernel_segment + arm64_mem.text_offset, + arm64_mem.image_size, 0); + + /* load additional data */ + result = arm64_load_other_segments(info, kernel_segment + + arm64_mem.text_offset); + +exit: + if (result) + fprintf(stderr, "kexec: load failed.\n"); + return result; +} + +void image_arm64_usage(void) +{ + printf( +" An ARM64 binary image, compressed or not, big or little endian.\n" +" Typically an Image file.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-uImage-arm64.c b/kexec/arch/arm64/kexec-uImage-arm64.c new file mode 100644 index 0000000..c466913 --- /dev/null +++ b/kexec/arch/arm64/kexec-uImage-arm64.c @@ -0,0 +1,52 @@ +/* + * uImage support added by David Woodhouse <dwmw2@infradead.org> + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-arm64.h" + +int uImage_arm64_probe(const char *buf, off_t len) +{ + int ret; + + ret = uImage_probe_kernel(buf, len, IH_ARCH_ARM64); + + /* 0 - valid uImage. + * -1 - uImage is corrupted. + * 1 - image is not a uImage. + */ + if (!ret) + return 0; + else + return -1; +} + +int uImage_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct Image_info img; + int ret; + + if (info->file_mode) { + fprintf(stderr, + "uImage is not supported in kexec_file\n"); + + return EFAILED; + } + + ret = uImage_load(buf, len, &img); + if (ret) + return ret; + + return image_arm64_load(argc, argv, img.buf, img.len, info); +} + +void uImage_arm64_usage(void) +{ + printf( +" An ARM64 U-boot uImage file, compressed or not, big or little endian.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-vmlinuz-arm64.c b/kexec/arch/arm64/kexec-vmlinuz-arm64.c new file mode 100644 index 0000000..c0ee47c --- /dev/null +++ b/kexec/arch/arm64/kexec-vmlinuz-arm64.c @@ -0,0 +1,110 @@ +/* + * ARM64 PE compressed Image (vmlinuz, ZBOOT) support. + * + * Several distros use 'make zinstall' rule inside + * 'arch/arm64/boot/Makefile' to install the arm64 + * ZBOOT compressed file inside the boot destination + * directory (for e.g. /boot). + * + * Currently we cannot use kexec_file_load() to load vmlinuz + * PE images that self decompress. + * + * To support ZBOOT, we should: + * a). Copy the compressed contents of vmlinuz to a temporary file. + * b). Decompress (gunzip-decompress) the contents inside the + * temporary file. + * c). Validate the resulting image and write it back to the + * temporary file. + * d). Pass the 'fd' of the temporary file to the kernel space. + * + * Note this, module doesn't provide a _load() function instead + * relying on image_arm64_load() to load the resulting decompressed + * image. + * + * So basically the kernel space still gets a decompressed + * kernel image to load via kexec-tools. + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include "kexec-arm64.h" +#include <kexec-pe-zboot.h> +#include "arch/options.h" + +static int kernel_fd = -1; + +/* Returns: + * -1 : in case of error/invalid format (not a valid PE+compressed ZBOOT format. + */ +int pez_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + int ret = -1; + const struct arm64_image_header *h; + char *buf; + off_t buf_sz; + + buf = (char *)kernel_buf; + buf_sz = kernel_size; + if (!buf) + return -1; + h = (const struct arm64_image_header *)buf; + + dbgprintf("%s: PROBE.\n", __func__); + if (buf_sz < sizeof(struct arm64_image_header)) { + dbgprintf("%s: Not large enough to be a PE image.\n", __func__); + return -1; + } + if (!arm64_header_check_pe_sig(h)) { + dbgprintf("%s: Not an PE image.\n", __func__); + return -1; + } + + if (buf_sz < sizeof(struct arm64_image_header) + h->pe_header) { + dbgprintf("%s: PE image offset larger than image.\n", __func__); + return -1; + } + + if (memcmp(&buf[h->pe_header], + arm64_pe_machtype, sizeof(arm64_pe_machtype))) { + dbgprintf("%s: PE header doesn't match machine type.\n", __func__); + return -1; + } + + ret = pez_prepare(buf, buf_sz, &kernel_fd); + + if (!ret) { + /* validate the arm64 specific header */ + struct arm64_image_header hdr_check; + if (read(kernel_fd, &hdr_check, sizeof(hdr_check)) != sizeof(hdr_check)) + goto bad_header; + + lseek(kernel_fd, 0, SEEK_SET); + + if (!arm64_header_check_magic(&hdr_check)) { + dbgprintf("%s: Bad arm64 image header.\n", __func__); + goto bad_header; + } + } + + return ret; +bad_header: + close(kernel_fd); + free(buf); + return -1; +} + +int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + info->kernel_fd = kernel_fd; + return image_arm64_load(argc, argv, buf, len, info); +} + +void pez_arm64_usage(void) +{ + printf( +" An ARM64 vmlinuz, PE image of a compressed, little endian.\n" +" kernel, built with ZBOOT enabled.\n\n"); +} diff --git a/kexec/arch/cris/Makefile b/kexec/arch/cris/Makefile new file mode 100644 index 0000000..4982f3e --- /dev/null +++ b/kexec/arch/cris/Makefile @@ -0,0 +1,13 @@ +cris_KEXEC_SRCS = kexec/arch/cris/kexec-cris.c +cris_KEXEC_SRCS += kexec/arch/cris/kexec-elf-cris.c +cris_KEXEC_SRCS += kexec/arch/cris/cris-setup-simple.S +cris_KEXEC_SRCS += kexec/arch/cris/kexec-elf-rel-cris.c + +cris_ADD_BUFFER = +cris_ADD_SEGMENT = +cris_VIRT_TO_PHYS = + +dist += kexec/arch/cris/Makefile $(cris_KEXEC_SRCS) \ + kexec/arch/cris/kexec-cris.h \ + kexec/arch/cris/include/arch/options.h + diff --git a/kexec/arch/cris/cris-setup-simple.S b/kexec/arch/cris/cris-setup-simple.S new file mode 100644 index 0000000..764f188 --- /dev/null +++ b/kexec/arch/cris/cris-setup-simple.S @@ -0,0 +1,31 @@ +/* + * cris-setup-simple.S - code to execute before stepping into the new kernel. + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + .data + .globl cris_trampoline +cris_trampoline: + .balign 4 + lapc cris_regframe, $sp + moveq 0, $r0 + move $r0, $pid + + movem [$sp+], $r14 + jump $r0 + nop + + .globl cris_regframe +cris_regframe: + .balign 4 + .fill 16, 4, 0 +cris_trampoline_end: + + .globl cris_trampoline_size +cris_trampoline_size: + .long cris_trampoline_end - cris_trampoline + diff --git a/kexec/arch/cris/include/arch/options.h b/kexec/arch/cris/include/arch/options.h new file mode 100644 index 0000000..1c1a029 --- /dev/null +++ b/kexec/arch/cris/include/arch/options.h @@ -0,0 +1,35 @@ +#ifndef KEXEC_ARCH_CRIS_OPTIONS_H +#define KEXEC_ARCH_CRIS_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"append", 1, 0, OPT_APPEND}, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_CRIS_OPTIONS_H */ diff --git a/kexec/arch/cris/kexec-cris.c b/kexec/arch/cris/kexec-cris.c new file mode 100644 index 0000000..3b69709 --- /dev/null +++ b/kexec/arch/cris/kexec-cris.c @@ -0,0 +1,111 @@ +/* + * kexec-cris.c + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-cris.h" +#include <arch/options.h> + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + memory_range[memory_ranges].start = 0x40000000; + memory_range[memory_ranges].end = 0x41000000; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + + memory_range[memory_ranges].start = 0xc0000000; + memory_range[memory_ranges].end = 0xc1000000; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +struct file_type file_type[] = { + {"elf-cris", elf_cris_probe, elf_cris_load, elf_cris_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + { "cris", KEXEC_ARCH_CRIS }, + { "crisv32", KEXEC_ARCH_CRIS }, + { 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return (addr) & 0x7fffffff; +} + +/* + * add_segment() should convert base to a physical address on cris, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on cris, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} + diff --git a/kexec/arch/cris/kexec-cris.h b/kexec/arch/cris/kexec-cris.h new file mode 100644 index 0000000..7ee9945 --- /dev/null +++ b/kexec/arch/cris/kexec-cris.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_CRIS_H +#define KEXEC_CRIS_H + +int elf_cris_probe(const char *buf, off_t len); +int elf_cris_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_cris_usage(void); + +#endif /* KEXEC_CRIS_H */ diff --git a/kexec/arch/cris/kexec-elf-cris.c b/kexec/arch/cris/kexec-elf-cris.c new file mode 100644 index 0000000..7e251e6 --- /dev/null +++ b/kexec/arch/cris/kexec-elf-cris.c @@ -0,0 +1,133 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * Based on x86 implementation, + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include <arch/options.h> +#include "kexec-cris.h" + +int elf_cris_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_CRIS) { + result = -1; + goto out; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_cris_usage(void) +{ + printf(" --append=STRING Set the kernel command line to STRING\n" + ); +} + +#define CRAMFS_MAGIC 0x28cd3d45 +#define JHEAD_MAGIC 0x1FF528A6 +#define JHEAD_SIZE 8 +#define RAM_INIT_MAGIC 0x56902387 +#define COMMAND_LINE_MAGIC 0x87109563 +#define NAND_BOOT_MAGIC 0x9a9db001 + +int elf_cris_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line; + unsigned int *trampoline_buf; + unsigned long trampoline_base; + int opt; + extern void cris_trampoline(void); + extern unsigned long cris_trampoline_size; + extern struct regframe_t { + unsigned int regs[16]; + } cris_regframe; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"append", 1, 0, OPT_APPEND}, + { 0, 0, 0, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + cris_regframe.regs[0] = virt_to_phys(ehdr.e_entry); + cris_regframe.regs[8] = RAM_INIT_MAGIC; + cris_regframe.regs[12] = NAND_BOOT_MAGIC; + + trampoline_buf = xmalloc(cris_trampoline_size); + trampoline_base = add_buffer_virt(info, + trampoline_buf, + cris_trampoline_size, + cris_trampoline_size, + 4, 0, elf_max_addr(&ehdr), 1); + memcpy(trampoline_buf, + cris_trampoline, cris_trampoline_size); + info->entry = (void *)trampoline_base; + return 0; +} diff --git a/kexec/arch/cris/kexec-elf-rel-cris.c b/kexec/arch/cris/kexec-elf-rel-cris.c new file mode 100644 index 0000000..255cc2c --- /dev/null +++ b/kexec/arch/cris/kexec-elf-rel-cris.c @@ -0,0 +1,43 @@ +/* + * kexec-elf-rel-cris.c - kexec Elf relocation routines + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * derived from ../ppc/kexec-elf-rel-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_CRIS) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/hppa/Makefile b/kexec/arch/hppa/Makefile new file mode 100644 index 0000000..e98f6da --- /dev/null +++ b/kexec/arch/hppa/Makefile @@ -0,0 +1,13 @@ +# +# kexec hppa (linux booting linux) +# +hppa_KEXEC_SRCS = kexec/arch/hppa/kexec-hppa.c +hppa_KEXEC_SRCS += kexec/arch/hppa/kexec-elf-hppa.c +hppa_KEXEC_SRCS += kexec/arch/hppa/kexec-elf-rel-hppa.c + +hppa_ADD_SEGMENT = +hppa_VIRT_TO_PHYS = + +dist += kexec/arch/hppa/Makefile $(hppa_KEXEC_SRCS) \ + kexec/arch/hppa/kexec-hppa.h \ + kexec/arch/hppa/include/arch/options.h diff --git a/kexec/arch/hppa/include/arch/options.h b/kexec/arch/hppa/include/arch/options.h new file mode 100644 index 0000000..e9deb51 --- /dev/null +++ b/kexec/arch/hppa/include/arch/options.h @@ -0,0 +1,32 @@ +#ifndef KEXEC_ARCH_HPPA_OPTIONS_H +#define KEXEC_ARCH_HPPA_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, 0, OPT_APPEND }, \ + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, \ + { "append", 1, 0, OPT_APPEND }, \ + { "initrd", 1, 0, OPT_RAMDISK }, \ + { "ramdisk", 1, 0, OPT_RAMDISK }, + + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "" + +/* See the other architectures for details of these; HPPA has no + * loader-specific options yet. + */ +#define OPT_ARCH_MAX (OPT_MAX+0) + +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) + +#define MAX_MEMORY_RANGES 16 +#endif /* KEXEC_ARCH_HPPA_OPTIONS_H */ diff --git a/kexec/arch/hppa/kexec-elf-hppa.c b/kexec/arch/hppa/kexec-elf-hppa.c new file mode 100644 index 0000000..474a919 --- /dev/null +++ b/kexec/arch/hppa/kexec-elf-hppa.c @@ -0,0 +1,159 @@ +/* + * kexec-elf-hppa.c - kexec Elf loader for hppa + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-hppa.h" +#include <arch/options.h> + +#define PAGE_SIZE 4096 + +extern unsigned long phys_offset; + +int elf_hppa_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + phys_offset = ehdr.e_entry & 0xf0000000; + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_PARISC) { + /* for a different architecture */ + fprintf(stderr, "Not for this architecture.\n"); + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_hppa_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + ); +} + +int elf_hppa_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline = NULL, *ramdisk = NULL; + int opt, result, i; + unsigned long ramdisk_addr = 0; + off_t ramdisk_size = 0; + + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ALL_OPT_STR; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + cmdline = strdup(optarg); + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + } + } + + if (info->file_mode) { + if (cmdline) { + info->command_line = cmdline; + info->command_line_len = strlen(cmdline) + 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file " + "%s:%s\n", ramdisk, strerror(errno)); + return -1; + } + } + return 0; + } + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Fixup PT_LOAD segments that include the ELF header (offset zero) */ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type != PT_LOAD || phdr->p_offset) + continue; + + dbgprintf("Removing ELF header from segment %d\n", i); + phdr->p_paddr += PAGE_SIZE; + phdr->p_vaddr += PAGE_SIZE; + phdr->p_filesz -= PAGE_SIZE; + phdr->p_memsz -= PAGE_SIZE; + phdr->p_offset += PAGE_SIZE; + phdr->p_data += PAGE_SIZE; + } + + /* Load the ELF data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + + /* Load ramdisk */ + if (ramdisk) { + void *initrd = slurp_decompress_file(ramdisk, &ramdisk_size); + /* Store ramdisk at top of first memory chunk */ + ramdisk_addr = _ALIGN_DOWN(info->memory_range[0].end - + ramdisk_size + 1, PAGE_SIZE); + if (!buf) + die("Ramdisk load failed\n"); + add_buffer(info, initrd, ramdisk_size, ramdisk_size, + PAGE_SIZE, ramdisk_addr, info->memory_range[0].end, + 1); + } + + return 0; +} diff --git a/kexec/arch/hppa/kexec-elf-rel-hppa.c b/kexec/arch/hppa/kexec-elf-rel-hppa.c new file mode 100644 index 0000000..661b67b --- /dev/null +++ b/kexec/arch/hppa/kexec-elf-rel-hppa.c @@ -0,0 +1,37 @@ +/* + * kexec-elf-rel-hppa.c - kexec Elf relocation routines + * + * Copyright (C) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->e_machine != EM_PARISC) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/hppa/kexec-hppa.c b/kexec/arch/hppa/kexec-hppa.c new file mode 100644 index 0000000..77c9739 --- /dev/null +++ b/kexec/arch/hppa/kexec-hppa.c @@ -0,0 +1,148 @@ +/* + * kexec-hppa.c - kexec for hppa + * + * Copyright (C) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-hppa.h" +#include <arch/options.h> + +#define SYSTEM_RAM "System RAM\n" +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; +unsigned long phys_offset; + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0; + char line[512]; + FILE *fp; + + fp = fopen(iomem, "r"); + + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + + + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + + if (count != 2) + continue; + + str = line + consumed; + + if (memcmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else { + continue; + } + + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + if (++memory_ranges >= MAX_MEMORY_RANGES) + break; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + return 0; +} + +struct file_type file_type[] = { + {"elf-hppa", elf_hppa_probe, elf_hppa_load, elf_hppa_usage}, +}; +int file_types = ARRAY_SIZE(file_type); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "parisc64", KEXEC_ARCH_HPPA }, + { "parisc", KEXEC_ARCH_HPPA }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr - phys_offset; +} diff --git a/kexec/arch/hppa/kexec-hppa.h b/kexec/arch/hppa/kexec-hppa.h new file mode 100644 index 0000000..485e5b6 --- /dev/null +++ b/kexec/arch/hppa/kexec-hppa.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_HPPA_H +#define KEXEC_HPPA_H + +int elf_hppa_probe(const char *buf, off_t len); +int elf_hppa_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_hppa_usage(void); + +#endif /* KEXEC_HPPA_H */ diff --git a/kexec/arch/i386/Makefile b/kexec/arch/i386/Makefile new file mode 100644 index 0000000..f486103 --- /dev/null +++ b/kexec/arch/i386/Makefile @@ -0,0 +1,20 @@ +# +# kexec i386 (linux booting linux) +# +i386_KEXEC_SRCS = kexec/arch/i386/kexec-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-rel-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-bzImage.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-mb2-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +i386_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c +i386_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c + +dist += kexec/arch/i386/Makefile $(i386_KEXEC_SRCS) \ + kexec/arch/i386/crashdump-x86.h \ + kexec/arch/i386/kexec-x86.h \ + kexec/arch/i386/x86-linux-setup.h \ + kexec/arch/i386/include/arch/options.h diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c new file mode 100644 index 0000000..df1f24c --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.c @@ -0,0 +1,1049 @@ +/* + * kexec: Linux boots Linux + * + * Created by: Vivek Goyal (vgoyal@in.ibm.com) + * old x86_64 version Created by: Murali M Chakravarthy (muralim@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include "../../kexec-xen.h" +#include "x86-linux-setup.h" +#include <x86/x86-linux.h> + +extern struct arch_options_t arch_options; + +static int get_kernel_page_offset(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + + if (elf_info->machine == EM_X86_64) { + /* get_kernel_vaddr_and_size will override this */ + elf_info->page_offset = X86_64_PAGE_OFFSET; + } + else if (elf_info->machine == EM_386) { + elf_info->page_offset = X86_PAGE_OFFSET; + } + + return 0; +} + +#define X86_64_KERN_VADDR_ALIGN 0x100000 /* 1MB */ + +/* Read kernel physical load addr from the file returned by proc_iomem() + * (Kernel Code) and store in kexec_info */ +static int get_kernel_paddr(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + uint64_t start; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { + elf_info->kern_paddr_start = start; + dbgprintf("kernel load physical addr start = 0x%016Lx\n", + (unsigned long long)start); + return 0; + } + + fprintf(stderr, "Cannot determine kernel physical load addr\n"); + return -1; +} + +/* Retrieve info regarding virtual address kernel has been compiled for and + * size of the kernel from /proc/kcore. Current /proc/kcore parsing from + * from kexec-tools fails because of malformed elf notes. A kernel patch has + * been submitted. For the folks using older kernels, this function + * hard codes the values to remain backward compatible. Once things stablize + * we should get rid of backward compatible code. */ + +static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + int result; + const char kcore[] = "/proc/kcore"; + char *buf; + struct mem_ehdr ehdr; + struct mem_phdr *phdr, *end_phdr; + int align; + off_t size; + uint32_t elf_flags = 0; + uint64_t stext_sym; + const unsigned long long pud_mask = ~((1 << 30) - 1); + unsigned long long vaddr, lowest_vaddr = 0; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + align = getpagesize(); + buf = slurp_file_len(kcore, KCORE_ELF_HEADERS_SIZE, &size); + if (!buf) { + fprintf(stderr, "Cannot read %s: %s\n", kcore, strerror(errno)); + return -1; + } + + /* Don't perform checks to make sure stated phdrs and shdrs are + * actually present in the core file. It is not practical + * to read the GB size file into a user space buffer, Given the + * fact that we don't use any info from that. + */ + elf_flags |= ELF_SKIP_FILESZ_CHECK; + result = build_elf_core_info(buf, size, &ehdr, elf_flags); + if (result < 0) { + /* Perhaps KCORE_ELF_HEADERS_SIZE is too small? */ + fprintf(stderr, "ELF core (kcore) parse failed\n"); + return -1; + } + + end_phdr = &ehdr.e_phdr[ehdr.e_phnum]; + + /* Search for the real PAGE_OFFSET when KASLR memory randomization + * is enabled */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + vaddr = phdr->p_vaddr & pud_mask; + if (lowest_vaddr == 0 || lowest_vaddr > vaddr) + lowest_vaddr = vaddr; + } + } + if (lowest_vaddr != 0) + elf_info->page_offset = lowest_vaddr; + + /* Traverse through the Elf headers and find the region where + * _stext symbol is located in. That's where kernel is mapped */ + stext_sym = get_kernel_sym("_stext"); + for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if (saddr <= stext_sym && eaddr > stext_sym) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + /* If failed to retrieve kernel text mapping through + * /proc/kallsyms, Traverse through the Elf headers again and + * find the region where kernel is mapped using hard-coded + * kernel mapping boundries */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if ((saddr >= X86_64__START_KERNEL_map) && + (eaddr <= X86_64__START_KERNEL_map + X86_64_KERNEL_TEXT_SIZE)) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + fprintf(stderr, "Can't find kernel text map area from kcore\n"); + return -1; +} + +/* Forward Declaration. */ +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit); +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end); + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region */ +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +/* Memory region reserved for storing panic kernel and other data. */ +#define CRASH_RESERVED_MEM_NR 8 +static struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; +static int crash_reserved_mem_nr; + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges, + int kexec_flags, unsigned long lowmem_limit) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0, gart = 0, i; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + uint64_t gart_start = 0, gart_end = 0; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + + if (memory_ranges >= CRASH_MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + dbgprintf("%016llx-%016llx : %s", + start, end, str); + /* + * We want to dump any System RAM -- memory regions currently + * used by the kernel. In the usual case, this is "System RAM" + * on the top level. However, we can also have "System RAM + * (virtio_mem)" below virtio devices or "System RAM (kmem)" + * below "Persistent Memory". + */ + if (strstr(str, "System RAM")) { + type = RANGE_RAM; + } else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + /* + * ACPI Tables area need to be passed to new + * kernel with appropriate memmap= option. This + * is needed so that x86_64 kernel creates linear + * mapping for this region which is required for + * initializing acpi tables in second kernel. + */ + type = RANGE_ACPI; + } else if(memcmp(str,"ACPI Non-volatile Storage\n",26) == 0 ) { + type = RANGE_ACPI_NVS; + } else if(memcmp(str,"Persistent Memory (legacy)\n",27) == 0 ) { + type = RANGE_PRAM; + } else if(memcmp(str,"Persistent Memory\n",18) == 0 ) { + type = RANGE_PMEM; + } else if(memcmp(str,"reserved\n",9) == 0 ) { + type = RANGE_RESERVED; + } else if (memcmp(str, "Reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else if (memcmp(str, "GART\n", 5) == 0) { + gart_start = start; + gart_end = end; + gart = 1; + continue; + } else { + continue; + } + + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + + segregate_lowmem_region(&memory_ranges, lowmem_limit); + + memory_ranges++; + } + fclose(fp); + if (kexec_flags & KEXEC_PRESERVE_CONTEXT) { + for (i = 0; i < memory_ranges; i++) { + if (crash_memory_range[i].end > 0x0009ffff) { + crash_reserved_mem[0].start = \ + crash_memory_range[i].start; + break; + } + } + if (crash_reserved_mem[0].start >= mem_max) { + fprintf(stderr, "Too small mem_max: 0x%llx.\n", + mem_max); + return -1; + } + crash_reserved_mem[0].end = mem_max; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(&memory_ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + return -1; + + if (gart) { + /* exclude GART region if the system has one */ + if (exclude_region(&memory_ranges, gart_start, gart_end) < 0) + return -1; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + struct e820entry *e820entries; + int j, rc, ret = -1; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + e820entries = xmalloc(sizeof(*e820entries) * CRASH_MAX_MEMORY_RANGES); + + rc = xc_get_machine_memory_map(xc, e820entries, CRASH_MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(-rc)); + goto err; + } + + for (i = 0, j = 0; i < rc && j < CRASH_MAX_MEMORY_RANGES; ++i, ++j) { + crash_memory_range[j].start = e820entries[i].addr; + crash_memory_range[j].end = e820entries[i].addr + e820entries[i].size - 1; + crash_memory_range[j].type = xen_e820_to_kexec_type(e820entries[i].type); + segregate_lowmem_region(&j, lowmem_limit); + } + + *range = crash_memory_range; + *ranges = j; + + qsort(*range, *ranges, sizeof(struct memory_range), compare_ranges); + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + goto err; + + ret = 0; + +err: + xc_interface_close(xc); + free(e820entries); + return ret; +} +#else +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit) +{ + unsigned long long end, start; + unsigned type; + + start = crash_memory_range[*nr_ranges].start; + end = crash_memory_range[*nr_ranges].end; + type = crash_memory_range[*nr_ranges].type; + + if (!(lowmem_limit && lowmem_limit > start && lowmem_limit < end)) + return; + + crash_memory_range[*nr_ranges].end = lowmem_limit - 1; + + if (*nr_ranges >= CRASH_MAX_MEMORY_RANGES - 1) + return; + + ++*nr_ranges; + + crash_memory_range[*nr_ranges].start = lowmem_limit; + crash_memory_range[*nr_ranges].end = end; + crash_memory_range[*nr_ranges].type = type; +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end) +{ + int i, j, tidx = -1; + struct memory_range temp_region = {0, 0, 0}; + + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (start < mend && end > mstart) { + if (start != mstart && end != mend) { + /* Split memory region */ + crash_memory_range[i].end = start - 1; + temp_region.start = end + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (start != mstart) + crash_memory_range[i].end = start - 1; + else + crash_memory_range[i].start = end + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == CRASH_MAX_MEMORY_RANGES) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx] = temp_region; + (*nr_ranges)++; + } + return 0; +} + +/* Adds a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int add_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size, int type) +{ + int i, j, nr_entries = 0, tidx = 0, align = 1024; + unsigned long long mstart, mend; + + /* Shrink to 1KiB alignment if needed. */ + if (type == RANGE_RAM && ((addr%align) || (size%align))) { + unsigned long long end = addr + size; + + printf("%s: RAM chunk %#llx - %#llx unaligned\n", __func__, addr, end); + addr = _ALIGN_UP(addr, align); + end = _ALIGN_DOWN(end, align); + if (addr >= end) + return -1; + size = end - addr; + printf("%s: RAM chunk shrunk to %#llx - %#llx\n", __func__, addr, end); + } + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + break; + if (mstart <= (addr+size-1) && mend >=addr) + /* Overlapping region. */ + return -1; + else if (addr > mend) + tidx = i+1; + } + /* Insert the memory region. */ + for (j = nr_entries-1; j >= tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx].start = addr; + memmap_p[tidx].end = addr + size - 1; + memmap_p[tidx].type = type; + *nr_memmap = nr_entries + 1; + + dbgprint_mem_range("Memmap after adding segment", memmap_p, *nr_memmap); + + return 0; +} + +/* Removes a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int delete_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size) +{ + int i, j, nr_entries = 0, tidx = -1, operation = 0, align = 1024; + unsigned long long mstart, mend; + struct memory_range temp_region; + + /* Do alignment check. */ + if ((addr%align) || (size%align)) + return -1; + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + /* List if full */ + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + /* Did not find the segment in the list. */ + return -1; + if (mstart <= addr && mend >= (addr + size - 1)) { + if (mstart == addr && mend == (addr + size - 1)) { + /* Exact match. Delete region */ + operation = -1; + tidx = i; + break; + } + if (mstart != addr && mend != (addr + size - 1)) { + /* Split in two */ + memmap_p[i].end = addr - 1; + temp_region.start = addr + size; + temp_region.end = mend; + temp_region.type = memmap_p[i].type; + operation = 1; + tidx = i; + break; + } + + /* No addition/deletion required. Adjust the existing.*/ + if (mstart != addr) { + memmap_p[i].end = addr - 1; + break; + } else { + memmap_p[i].start = addr + size; + break; + } + } + } + if ((operation == 1) && tidx >=0) { + /* Insert the split memory region. */ + for (j = nr_entries-1; j > tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx+1] = temp_region; + *nr_memmap = nr_entries + 1; + } + if ((operation == -1) && tidx >=0) { + /* Delete the exact match memory region. */ + for (j = i+1; j < CRASH_MAX_MEMMAP_NR; j++) + memmap_p[j-1] = memmap_p[j]; + memmap_p[j-1].start = memmap_p[j-1].end = 0; + *nr_memmap = nr_entries - 1; + } + + dbgprint_mem_range("Memmap after deleting segment", memmap_p, *nr_memmap); + + return 0; +} + +static void cmdline_add_memmap_internal(char *cmdline, unsigned long startk, + unsigned long endk, int type) +{ + int cmdlen, len; + char str_mmap[256], str_tmp[20]; + + strcpy (str_mmap, " memmap="); + ultoa((endk-startk), str_tmp); + strcat (str_mmap, str_tmp); + + if (type == RANGE_RAM) + strcat (str_mmap, "K@"); + else if (type == RANGE_RESERVED) + strcat (str_mmap, "K$"); + else if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + strcat (str_mmap, "K#"); + else if (type == RANGE_PRAM) + strcat (str_mmap, "K!"); + + ultoa(startk, str_tmp); + strcat (str_mmap, str_tmp); + strcat (str_mmap, "K"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); +} + +/* Adds the appropriate memmap= options to command line, indicating the + * memory regions the new kernel can use to boot into. */ +static int cmdline_add_memmap(char *cmdline, struct memory_range *memmap_p) +{ + int i, cmdlen, len; + unsigned long min_sizek = 100; + char str_mmap[256]; + + /* Exact map */ + strcpy(str_mmap, " memmap=exactmap"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + unsigned long startk, endk, type; + + startk = memmap_p[i].start/1024; + endk = (memmap_p[i].end + 1)/1024; + type = memmap_p[i].type; + + /* Only adding memory regions of RAM and ACPI and Persistent Mem */ + if (type != RANGE_RAM && + type != RANGE_ACPI && + type != RANGE_ACPI_NVS && + type != RANGE_PRAM) + continue; + + if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + endk = _ALIGN_UP(memmap_p[i].end + 1, 1024)/1024; + + if (!startk && !endk) + /* All regions traversed. */ + break; + + /* A RAM region is not worth adding if region size < 100K. + * It eats up precious command line length. */ + if (type == RANGE_RAM && (endk - startk) < min_sizek) + continue; + /* And do not add e820 reserved region either */ + cmdline_add_memmap_internal(cmdline, startk, endk, type); + } + + dbgprintf("Command line after adding memmap\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + +/* Adds the elfcorehdr= command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) +{ + int cmdlen, len, align = 1024; + char str[30], *ptr; + + /* Passing in elfcorehdr=xxxK format. Saves space required in cmdline. + * Ensure 1K alignment*/ + if (addr%align) + return -1; + addr = addr/align; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + + +/* + * This routine is specific to i386 architecture to maintain the + * backward compatibility, other architectures can use the per + * cpu version get_crash_notes_per_cpu() directly. + */ +static int get_crash_notes(int cpu, uint64_t *addr, uint64_t *len) +{ + const char *crash_notes = "/sys/kernel/crash_notes"; + char line[MAX_LINE]; + FILE *fp; + unsigned long vaddr; + int count; + + fp = fopen(crash_notes, "r"); + if (fp) { + if (fgets(line, sizeof(line), fp) != 0) { + count = sscanf(line, "%lx", &vaddr); + if (count != 1) + die("Cannot parse %s: %s\n", crash_notes, + strerror(errno)); + } + + *addr = x86__pa(vaddr + (cpu * MAX_NOTE_BYTES)); + *len = MAX_NOTE_BYTES; + + dbgprintf("crash_notes addr = %llx\n", + (unsigned long long)*addr); + + fclose(fp); + return 0; + } else + return get_crash_notes_per_cpu(cpu, addr, len); +} + +static enum coretype get_core_type(struct crash_elf_info *elf_info, + struct memory_range *range, int ranges) +{ + if ((elf_info->machine) == EM_X86_64) + return CORE_TYPE_ELF64; + else { + /* fall back to default */ + if (ranges == 0) + return CORE_TYPE_ELF64; + + if (range[ranges - 1].end > 0xFFFFFFFFUL) + return CORE_TYPE_ELF64; + else + return CORE_TYPE_ELF32; + } +} + +static int sysfs_efi_runtime_map_exist(void) +{ + DIR *dir; + + dir = opendir("/sys/firmware/efi/runtime-map"); + if (!dir) + return 0; + + closedir(dir); + return 1; +} + +/* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ +static void cmdline_add_efi(char *cmdline) +{ + uint64_t acpi_rsdp; + char acpi_rsdp_buf[MAX_LINE]; + + acpi_rsdp = get_acpi_rsdp(); + + if (!acpi_rsdp) + return; + + sprintf(acpi_rsdp_buf, " acpi_rsdp=0x%lx", acpi_rsdp); + if (strlen(cmdline) + strlen(acpi_rsdp_buf) > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + + strcat(cmdline, acpi_rsdp_buf); +} + +static void get_backup_area(struct kexec_info *info, + struct memory_range *range, int ranges) +{ + int i; + + /* Look for first 640 KiB RAM region. */ + for (i = 0; i < ranges; ++i) { + if (range[i].type != RANGE_RAM || range[i].end > 0xa0000) + continue; + + info->backup_src_start = range[i].start; + info->backup_src_size = range[i].end - range[i].start + 1; + + dbgprintf("%s: %016llx-%016llx : System RAM\n", __func__, + range[i].start, range[i].end); + + return; + } + + /* First 640 KiB RAM region not found. Assume defaults. */ + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_END - BACKUP_SRC_START + 1; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, bufsz, memsz, elfcorehdr; + int nr_ranges = 0, nr_memmap = 0, align = 1024, i; + struct memory_range *mem_range, *memmap_p; + struct crash_elf_info elf_info; + unsigned kexec_arch; + + memset(&elf_info, 0x0, sizeof(elf_info)); + + /* Constant parts of the elf_info */ + memset(&elf_info, 0, sizeof(elf_info)); + elf_info.data = ELFDATA2LSB; + + /* Get the architecture of the running kernel */ + kexec_arch = info->kexec_flags & KEXEC_ARCH_MASK; + if (kexec_arch == KEXEC_ARCH_DEFAULT) + kexec_arch = KEXEC_ARCH_NATIVE; + + /* Get the elf architecture of the running kernel */ + switch(kexec_arch) { + case KEXEC_ARCH_X86_64: + elf_info.machine = EM_X86_64; + break; + case KEXEC_ARCH_386: + elf_info.machine = EM_386; + elf_info.lowmem_limit = X86_MAXMEM; + elf_info.get_note_info = get_crash_notes; + break; + default: + fprintf(stderr, "unsupported crashdump architecture: %04x\n", + kexec_arch); + return -1; + } + + if (xen_present()) { + if (get_crash_memory_ranges_xen(&mem_range, &nr_ranges, + elf_info.lowmem_limit) < 0) + return -1; + } else + if (get_crash_memory_ranges(&mem_range, &nr_ranges, + info->kexec_flags, + elf_info.lowmem_limit) < 0) + return -1; + + get_backup_area(info, mem_range, nr_ranges); + + dbgprint_mem_range("CRASH MEMORY RANGES", mem_range, nr_ranges); + + /* + * if the core type has not been set on command line, set it here + * automatically + */ + if (arch_options.core_header_type == CORE_TYPE_UNDEF) { + arch_options.core_header_type = + get_core_type(&elf_info, mem_range, nr_ranges); + } + /* Get the elf class... */ + elf_info.class = ELFCLASS32; + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + elf_info.class = ELFCLASS64; + } + + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + if (get_kernel_paddr(info, &elf_info)) + return -1; + + if (get_kernel_vaddr_and_size(info, &elf_info)) + return -1; + + /* Memory regions which panic kernel can safely use to boot into */ + sz = (sizeof(struct memory_range) * CRASH_MAX_MEMMAP_NR); + memmap_p = xmalloc(sz); + memset(memmap_p, 0, sz); + add_memmap(memmap_p, &nr_memmap, info->backup_src_start, info->backup_src_size, RANGE_RAM); + for (i = 0; i < crash_reserved_mem_nr; i++) { + sz = crash_reserved_mem[i].end - crash_reserved_mem[i].start +1; + if (add_memmap(memmap_p, &nr_memmap, crash_reserved_mem[i].start, + sz, RANGE_RAM) < 0) { + free(memmap_p); + return ENOCRASHKERNEL; + } + } + + /* Create a backup region segment to store backup data*/ + if (!(info->kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + sz = _ALIGN(info->backup_src_size, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, -1); + dbgprintf("Created backup segment at 0x%lx\n", + info->backup_start); + if (delete_memmap(memmap_p, &nr_memmap, info->backup_start, sz) < 0) { + free(tmp); + free(memmap_p); + return EFAILED; + } + } + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + /* the size of the elf headers allocated is returned in 'bufsz' */ + + /* Hack: With some ld versions (GNU ld version 2.14.90.0.4 20030523), + * vmlinux program headers show a gap of two pages between bss segment + * and data segment but effectively kernel considers it as bss segment + * and overwrites the any data placed there. Hence bloat the memsz of + * elf core header segment to 16K to avoid being placed in such gaps. + * This is a makeshift solution until it is fixed in kernel. + */ + if (bufsz < (16*1024)) { + /* bufsize is big enough for all the PT_NOTE's and PT_LOAD's */ + memsz = 16*1024; + /* memsz will be the size of the memory hole we look for */ + } else { + memsz = bufsz; + } + elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, + max_addr, -1); + dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); + if (delete_memmap(memmap_p, &nr_memmap, elfcorehdr, memsz) < 0) { + free(memmap_p); + return -1; + } + if (!bzImage_support_efi_boot || arch_options.noefi || + !sysfs_efi_runtime_map_exist()) + cmdline_add_efi(mod_cmdline); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + /* Inform second kernel about the presence of ACPI tables. */ + for (i = 0; i < nr_ranges; i++) { + unsigned long start, end, size, type; + if ( !( mem_range[i].type == RANGE_ACPI + || mem_range[i].type == RANGE_ACPI_NVS + || mem_range[i].type == RANGE_RESERVED + || mem_range[i].type == RANGE_PMEM + || mem_range[i].type == RANGE_PRAM)) + continue; + start = mem_range[i].start; + end = mem_range[i].end; + type = mem_range[i].type; + size = end - start + 1; + add_memmap(memmap_p, &nr_memmap, start, size, type); + } + + if (arch_options.pass_memmap_cmdline) + cmdline_add_memmap(mod_cmdline, memmap_p); + + /* Store 2nd kernel boot memory ranges for later reference in + * x86-setup-linux.c: setup_linux_system_parameters() */ + info->crash_range = memmap_p; + info->nr_crash_ranges = nr_memmap; + + return 0; +} + +/* On x86, the kernel may make a low reservation in addition to the + * normal reservation. However, the kernel refuses to load the panic + * kernel to low memory, so always choose the highest range. + */ +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!crash_reserved_mem_nr) + return -1; + + *start = crash_reserved_mem[crash_reserved_mem_nr - 1].start; + *end = crash_reserved_mem[crash_reserved_mem_nr - 1].end; + + return 0; +} + +static int crashkernel_mem_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + if (nr >= CRASH_RESERVED_MEM_NR) + return 1; + + crash_reserved_mem[nr].start = base; + crash_reserved_mem[nr].end = base + length - 1; + crash_reserved_mem[nr].type = RANGE_RAM; + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int ret; + + if (xen_present()) { + uint64_t start, end; + + ret = xen_get_crashkernel_region(&start, &end); + if (ret < 0) + return 0; + + crash_reserved_mem[0].start = start; + crash_reserved_mem[0].end = end; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } else { + ret = kexec_iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + } + + return !!crash_reserved_mem_nr; +} diff --git a/kexec/arch/i386/crashdump-x86.h b/kexec/arch/i386/crashdump-x86.h new file mode 100644 index 0000000..479a549 --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.h @@ -0,0 +1,33 @@ +#ifndef CRASHDUMP_X86_H +#define CRASHDUMP_X86_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); + +#define X86_PAGE_OFFSET 0xc0000000 +#define x86__pa(x) ((unsigned long)(x)-X86_PAGE_OFFSET) + +#define X86__VMALLOC_RESERVE (128 << 20) +#define X86_MAXMEM (-X86_PAGE_OFFSET-X86__VMALLOC_RESERVE) + +#define X86_64__START_KERNEL_map 0xffffffff80000000ULL +#define X86_64_PAGE_OFFSET_PRE_2_6_27 0xffff810000000000ULL +#define X86_64_PAGE_OFFSET_PRE_4_20_0 0xffff880000000000ULL +#define X86_64_PAGE_OFFSET 0xffff888000000000ULL + +#define X86_64_MAXMEM 0x3fffffffffffUL + +/* Kernel text size */ +#define X86_64_KERNEL_TEXT_SIZE (512UL*1024*1024) + +#define CRASH_MAX_MEMMAP_NR 1024 + +#define CRASH_MAX_MEMORY_RANGES 32768 + +/* Backup Region, First 640K of System RAM. */ +#define BACKUP_SRC_START 0x00000000 +#define BACKUP_SRC_END 0x0009ffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* CRASHDUMP_X86_H */ diff --git a/kexec/arch/i386/include/arch/options.h b/kexec/arch/i386/include/arch/options.h new file mode 100644 index 0000000..89e0a95 --- /dev/null +++ b/kexec/arch/i386/include/arch/options.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_ARCH_I386_OPTIONS_H +#define KEXEC_ARCH_I386_OPTIONS_H + +/* + ************************************************************************* + * NOTE NOTE NOTE + * This file is included for i386 builds *and* x86_64 builds (which build + * both x86_64 and i386 loaders). + * It contains the combined set of options used by i386 and x86_64. + ************************************************************************* + */ + +#define OPT_RESET_VGA (OPT_MAX+0) +#define OPT_SERIAL (OPT_MAX+1) +#define OPT_SERIAL_BAUD (OPT_MAX+2) +#define OPT_CONSOLE_VGA (OPT_MAX+3) +#define OPT_CONSOLE_SERIAL (OPT_MAX+4) +#define OPT_ELF32_CORE (OPT_MAX+5) +#define OPT_ELF64_CORE (OPT_MAX+6) +#define OPT_ARCH_MAX (OPT_MAX+7) + +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_ARGS_ELF (OPT_ARCH_MAX+3) +#define OPT_ARGS_LINUX (OPT_ARCH_MAX+4) +#define OPT_ARGS_NONE (OPT_ARCH_MAX+5) +#define OPT_CL (OPT_ARCH_MAX+6) +#define OPT_MOD (OPT_ARCH_MAX+7) +#define OPT_VGA (OPT_ARCH_MAX+8) +#define OPT_REAL_MODE (OPT_ARCH_MAX+9) +#define OPT_ENTRY_32BIT (OPT_ARCH_MAX+10) +#define OPT_PASS_MEMMAP_CMDLINE (OPT_ARCH_MAX+11) +#define OPT_NOEFI (OPT_ARCH_MAX+12) +#define OPT_REUSE_VIDEO_TYPE (OPT_ARCH_MAX+13) +#define OPT_DTB (OPT_ARCH_MAX+14) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "reset-vga", 0, 0, OPT_RESET_VGA }, \ + { "serial", 1, 0, OPT_SERIAL }, \ + { "serial-baud", 1, 0, OPT_SERIAL_BAUD }, \ + { "console-vga", 0, 0, OPT_CONSOLE_VGA }, \ + { "console-serial", 0, 0, OPT_CONSOLE_SERIAL }, \ + { "elf32-core-headers", 0, 0, OPT_ELF32_CORE }, \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "pass-memmap-cmdline", 0, 0, OPT_PASS_MEMMAP_CMDLINE }, \ + { "noefi", 0, 0, OPT_NOEFI}, \ + { "reuse-video-type", 0, 0, OPT_REUSE_VIDEO_TYPE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "args-elf", 0, NULL, OPT_ARGS_ELF }, \ + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, \ + { "args-none", 0, NULL, OPT_ARGS_NONE }, \ + { "module", 1, 0, OPT_MOD }, \ + { "real-mode", 0, NULL, OPT_REAL_MODE }, \ + { "entry-32bit", 0, NULL, OPT_ENTRY_32BIT }, \ + { "dtb", 1, NULL, OPT_DTB }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_I386_OPTIONS_H */ + diff --git a/kexec/arch/i386/kexec-beoboot-x86.c b/kexec/arch/i386/kexec-beoboot-x86.c new file mode 100644 index 0000000..d949ab8 --- /dev/null +++ b/kexec/arch/i386/kexec-beoboot-x86.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------ -*- C -*- + * Eric Biederman <ebiederman@xmission.com> + * Erik Arjan Hendriks <hendriks@lanl.gov> + * + * 14 December 2004 + * This file is a derivative of the beoboot image loader, modified + * to work with kexec. + * + * This version is derivative from the orignal mkbootimg.c which is + * Copyright (C) 2000 Scyld Computing Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + + * + *--------------------------------------------------------------------*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <x86/x86-linux.h> +#include <boot/beoboot.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include <arch/options.h> + +int beoboot_probe(const char *buf, off_t len) +{ + struct beoboot_header bb_header; + const char *cmdline, *kernel; + int result; + if ((uintmax_t)len < (uintmax_t)sizeof(bb_header)) { + return -1; + } + memcpy(&bb_header, buf, sizeof(bb_header)); + if (memcmp(bb_header.magic, BEOBOOT_MAGIC, 4) != 0) { + return -1; + } + if (bb_header.arch != BEOBOOT_ARCH) { + return -1; + } + /* Make certain a bzImage is packed into there. + */ + cmdline = buf + sizeof(bb_header); + kernel = cmdline + bb_header.cmdline_size; + result = bzImage_probe(kernel, bb_header.kernel_size); + + return result; +} + +void beoboot_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + ); + + /* No parameters are parsed */ +} + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +int beoboot_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct beoboot_header bb_header; + const char *command_line, *kernel, *initrd; + + int real_mode_entry; + int opt; + int result; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "real-mode", 0, 0, OPT_REAL_MODE }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + real_mode_entry = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + } + } + + + /* + * Parse the file + */ + memcpy(&bb_header, buf, sizeof(bb_header)); + command_line = buf + sizeof(bb_header); + kernel = command_line + bb_header.cmdline_size; + initrd = NULL; + if (bb_header.flags & BEOBOOT_INITRD_PRESENT) { + initrd = kernel + bb_header.kernel_size; + } + + result = do_bzImage_load(info, + kernel, bb_header.kernel_size, + command_line, bb_header.cmdline_size, + initrd, bb_header.initrd_size, + 0, 0, real_mode_entry); + + return result; +} + diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c new file mode 100644 index 0000000..1b8f20c --- /dev/null +++ b/kexec/arch/i386/kexec-bzImage.c @@ -0,0 +1,471 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2010 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; +int bzImage_support_efi_boot = 0; + +int bzImage_probe(const char *buf, off_t len) +{ + const struct x86_linux_header *header; + if ((uintmax_t)len < (uintmax_t)(2 * 512)) { + if (probe_debug) { + fprintf(stderr, "File is too short to be a bzImage!\n"); + } + return -1; + } + header = (const struct x86_linux_header *)buf; + if (memcmp(header->header_magic, "HdrS", 4) != 0) { + if (probe_debug) { + fprintf(stderr, "Not a bzImage\n"); + } + return -1; + } + if (header->boot_sector_magic != 0xAA55) { + if (probe_debug) { + fprintf(stderr, "No x86 boot sector present\n"); + } + /* No x86 boot sector present */ + return -1; + } + if (header->protocol_version < 0x0200) { + if (probe_debug) { + fprintf(stderr, "Must be at least protocol version 2.00\n"); + } + /* Must be at least protocol version 2.00 */ + return -1; + } + if ((header->loadflags & 1) == 0) { + if (probe_debug) { + fprintf(stderr, "zImage not a bzImage\n"); + } + /* Not a bzImage */ + return -1; + } + /* I've got a bzImage */ + if (probe_debug) { + fprintf(stderr, "It's a bzImage\n"); + } + return 0; +} + + +void bzImage_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --dtb=FILE Use FILE as devicetree.\n" + ); + +} + +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry) +{ + struct x86_linux_header setup_header; + struct x86_linux_param_header *real_mode; + int setup_sects; + size_t size; + int kern16_size; + unsigned long setup_base, setup_size; + struct entry32_regs regs32; + struct entry16_regs regs16; + unsigned int relocatable_kernel = 0; + unsigned long kernel32_load_addr; + char *modified_cmdline; + unsigned long cmdline_end; + unsigned long kern16_size_needed; + unsigned long heap_size = 0; + + /* + * Find out about the file I am about to load. + */ + if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512)) { + return -1; + } + memcpy(&setup_header, kernel, sizeof(setup_header)); + setup_sects = setup_header.setup_sects; + if (setup_sects == 0) { + setup_sects = 4; + } + + kern16_size = (setup_sects +1) *512; + if (kernel_len < kern16_size) { + fprintf(stderr, "BzImage truncated?\n"); + return -1; + } + + if (setup_header.protocol_version >= 0x0206) { + if ((uintmax_t)command_line_len > + (uintmax_t)setup_header.cmdline_size) { + dbgprintf("Kernel command line too long for kernel!\n"); + return -1; + } + } else { + if (command_line_len > 255) { + dbgprintf("WARNING: This kernel may only support 255 byte command lines\n"); + } + } + + if (setup_header.protocol_version >= 0x0205) { + relocatable_kernel = setup_header.relocatable_kernel; + dbgprintf("bzImage is relocatable\n"); + } + + /* Can't use bzImage for crash dump purposes with real mode entry */ + if((info->kexec_flags & KEXEC_ON_CRASH) && real_mode_entry) { + fprintf(stderr, "Can't use bzImage for crash dump purposes" + " with real mode entry\n"); + return -1; + } + + if((info->kexec_flags & KEXEC_ON_CRASH) && !relocatable_kernel) { + fprintf(stderr, "BzImage is not relocatable. Can't be used" + " as capture kernel.\n"); + return -1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. load_crashdump_segments will take care of + * loading the segments as high in memory as possible, hence + * in turn as away as possible from kernel to avoid being + * stomped by the kernel. + */ + if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0) + return -1; + + /* Use new command line buffer */ + command_line = modified_cmdline; + command_line_len = strlen(command_line) +1; + } + + /* Load the trampoline. This must load at a higher address + * than the argument/parameter segment or the kernel will stomp + * it's gdt. + * + * x86_64 purgatory code has got relocations type R_X86_64_32S + * that means purgatory got to be loaded within first 2G otherwise + * overflow takes place while applying relocations. + */ + if (!real_mode_entry && relocatable_kernel) + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 0x7fffffff, -1, 0); + else + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 640*1024, -1, 0); + dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr); + + /* The argument/parameter segment */ + if (real_mode_entry) { + /* need to include size for bss and heap etc */ + if (setup_header.protocol_version >= 0x0201) + kern16_size_needed = setup_header.heap_end_ptr; + else + kern16_size_needed = kern16_size + 8192; /* bss */ + if (kern16_size_needed < kern16_size) + kern16_size_needed = kern16_size; + if (kern16_size_needed > 0xfffc) + die("kern16_size_needed is more then 64k\n"); + heap_size = 0xfffc - kern16_size_needed; /* less 64k */ + heap_size = _ALIGN_DOWN(heap_size, 0x200); + kern16_size_needed += heap_size; + } else { + kern16_size_needed = kern16_size; + /* need to bigger than size of struct bootparams */ + if (kern16_size_needed < 4096) + kern16_size_needed = 4096; + } + setup_size = kern16_size_needed + command_line_len + + PURGATORY_CMDLINE_SIZE; + real_mode = xmalloc(setup_size); + memset(real_mode, 0, setup_size); + if (!real_mode_entry) { + unsigned long setup_header_size = kernel[0x201] + 0x202 - 0x1f1; + + /* only copy setup_header */ + if (setup_header_size > 0x7f) + setup_header_size = 0x7f; + memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1, + setup_header_size); + } else { + /* copy setup code and setup_header */ + memcpy(real_mode, kernel, kern16_size); + } + + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + /* If using bzImage for capture kernel, then we will not be + * executing real mode code. setup segment can be loaded + * anywhere as we will be just reading command line. + */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, -1, -1); + } + else if (real_mode->protocol_version >= 0x0200) { + /* Careful setup_base must be greater than 8K */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, 640*1024, 1); + } else { + add_segment(info, real_mode, setup_size, SETUP_BASE, setup_size); + setup_base = SETUP_BASE; + } + dbgprintf("Loaded real-mode code and command line at 0x%lx\n", + setup_base); + + /* Verify purgatory loads higher than the parameters */ + if (info->rhdr.rel_addr < setup_base) { + die("Could not put setup code above the kernel parameters\n"); + } + + /* The main kernel segment */ + size = kernel_len - kern16_size; + + if (real_mode->protocol_version >=0x0205 && relocatable_kernel) { + /* Relocatable bzImage */ + unsigned long kern_align = real_mode->kernel_alignment; + unsigned long kernel32_max_addr = DEFAULT_BZIMAGE_ADDR_MAX; + + if (kernel32_max_addr > real_mode->initrd_addr_max) + kernel32_max_addr = real_mode->initrd_addr_max; + + kernel32_load_addr = add_buffer(info, kernel + kern16_size, + size, size, kern_align, + 0x100000, kernel32_max_addr, + 1); + } + else { + kernel32_load_addr = KERN32_BASE; + add_segment(info, kernel + kern16_size, size, + kernel32_load_addr, size); + } + + dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr); + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, real_mode, setup_base, + kern16_size_needed, command_line, command_line_len, + initrd, initrd_len); + + if (real_mode_entry && real_mode->protocol_version >= 0x0201) { + real_mode->loader_flags |= 0x80; /* CAN_USE_HEAP */ + real_mode->heap_end_ptr += heap_size - 0x200; /*stack*/ + } + + /* Get the initial register values */ + if (real_mode_entry) + elf_rel_get_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + + /* + * Initialize the 32bit start information. + */ + regs32.eax = 0; /* unused */ + regs32.ebx = 0; /* 0 == boot not AP processor start */ + regs32.ecx = 0; /* unused */ + regs32.edx = 0; /* unused */ + regs32.esi = setup_base; /* kernel parameters */ + regs32.edi = 0; /* unused */ + regs32.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* stack, unused */ + regs32.ebp = 0; /* unused */ + regs32.eip = kernel32_load_addr; /* kernel entry point */ + + /* + * Initialize the 16bit start information. + */ + if (real_mode_entry) { + regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base >> 4; + regs16.cs = regs16.ds + 0x20; + regs16.ip = 0; + /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */ + regs16.ss = (elf_rel_get_addr(&info->rhdr, "stack_end") - 64*1024) >> 4; + /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */ + regs16.esp = 0xFFFC; + + printf("Starting the kernel in real mode\n"); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + real_mode->kernel_start = kernel32_load_addr; + } + if (real_mode_entry && kexec_debug) { + unsigned long entry16_debug, pre32, first32; + uint32_t old_first32; + /* Find the location of the symbols */ + entry16_debug = elf_rel_get_addr(&info->rhdr, "entry16_debug"); + pre32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_pre32"); + first32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_first32"); + + /* Hook all of the linux kernel hooks */ + real_mode->rmode_switch_cs = entry16_debug >> 4; + real_mode->rmode_switch_ip = pre32 - entry16_debug; + old_first32 = real_mode->kernel_start; + real_mode->kernel_start = first32; + elf_rel_set_symbol(&info->rhdr, "entry16_debug_old_first32", + &old_first32, sizeof(old_first32)); + + regs32.eip = entry16_debug; + } + if (real_mode_entry) { + elf_rel_set_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + elf_rel_set_symbol(&info->rhdr, "entry16_debug_regs", + ®s16, sizeof(regs16)); + } + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + cmdline_end = setup_base + kern16_size_needed + command_line_len - 1; + elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end, + sizeof(unsigned long)); + + /* Fill in the information BIOS calls would normally provide. */ + if (!real_mode_entry) { + setup_linux_system_parameters(info, real_mode); + } + + if (dtb) { + setup_linux_dtb(info, real_mode, dtb, dtb_len); + } + + return 0; +} + +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line = NULL; + char *tmp_cmdline = NULL; + const char *ramdisk, *append = NULL, *dtb; + char *ramdisk_buf; + off_t ramdisk_length; + int command_line_len; + int real_mode_entry; + int opt; + int result; + char *dtb_buf; + off_t dtb_length; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "dtb", 1, 0, OPT_DTB }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + real_mode_entry = 0; + ramdisk = 0; + ramdisk_length = 0; + dtb = 0; + dtb_length = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + case OPT_DTB: + dtb = optarg; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + ramdisk_buf = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + dtb_buf = 0; + if (dtb) { + dtb_buf = slurp_file(dtb, &dtb_length); + } + result = do_bzImage_load(info, + buf, len, + command_line, command_line_len, + ramdisk_buf, ramdisk_length, + dtb_buf, dtb_length, + real_mode_entry); + + free(command_line); + return result; +} diff --git a/kexec/arch/i386/kexec-elf-rel-x86.c b/kexec/arch/i386/kexec-elf-rel-x86.c new file mode 100644 index 0000000..55a214e --- /dev/null +++ b/kexec/arch/i386/kexec-elf-rel-x86.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if ((ehdr->e_machine != EM_386) && (ehdr->e_machine != EM_486)) + { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_386_32: + *((uint32_t *)location) += value; + break; + case R_386_PC32: + *((uint32_t *)location) += value - address; + break; + default: + die("Unknown rel relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/i386/kexec-elf-x86.c b/kexec/arch/i386/kexec-elf-x86.c new file mode 100644 index 0000000..8eba242 --- /dev/null +++ b/kexec/arch/i386/kexec-elf-x86.c @@ -0,0 +1,331 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "x86-linux-setup.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; + +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + if (probe_debug) { + fprintf(stderr, "Not an ELF executable\n"); + } + goto out; + } + + /* Verify the architecuture specific bits */ + switch (arch) { + case CORE_TYPE_ELF32: + if ((ehdr.e_machine != EM_386) && (ehdr.e_machine != EM_486)) { + if (probe_debug) + fprintf(stderr, "Not i386 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_ELF64: + if (ehdr.e_machine != EM_X86_64) { + if (probe_debug) + fprintf(stderr, "Not x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_UNDEF: + default: + if ( + (ehdr.e_machine != EM_386) && + (ehdr.e_machine != EM_486) && + (ehdr.e_machine != EM_X86_64) + ) { + if (probe_debug) + fprintf(stderr, "Not i386 or x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +int elf_x86_probe(const char *buf, off_t len) { + return elf_x86_any_probe(buf, len, CORE_TYPE_ELF32); +} + +void elf_x86_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --args-linux Pass linux kernel style options\n" + " --args-elf Pass elf boot notes\n" + ); + + +} + +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line = NULL, *modified_cmdline = NULL; + const char *append = NULL; + char *tmp_cmdline = NULL; + const char *error_msg = NULL; + int result; + int command_line_len; + const char *ramdisk; + unsigned long entry, max_addr; + int arg_style; +#define ARG_STYLE_ELF 0 +#define ARG_STYLE_LINUX 1 +#define ARG_STYLE_NONE 2 + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "args-elf", 0, NULL, OPT_ARGS_ELF }, + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, + { "args-none", 0, NULL, OPT_ARGS_NONE }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + arg_style = ARG_STYLE_ELF; + ramdisk = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_ARGS_ELF: + arg_style = ARG_STYLE_ELF; + break; + case OPT_ARGS_LINUX: + arg_style = ARG_STYLE_LINUX; + break; + case OPT_ARGS_NONE: +#ifdef __i386__ + arg_style = ARG_STYLE_NONE; +#else + die("--args-none only works on arch i386\n"); +#endif + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Do we want arguments? */ + if (arg_style != ARG_STYLE_NONE) { + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + } + if (arg_style == ARG_STYLE_NONE) { + info->entry = (void *)entry; + + } + else if (arg_style == ARG_STYLE_ELF) { + unsigned long note_base; + struct entry32_regs regs; + uint32_t arg1, arg2; + + /* Setup the ELF boot notes */ + note_base = elf_boot_notes(info, max_addr, + command_line, command_line_len); + + /* Initialize the stack arguments */ + arg2 = 0; /* No return address */ + arg1 = note_base; + elf_rel_set_symbol(&info->rhdr, "stack_arg32_1", &arg1, sizeof(arg1)); + elf_rel_set_symbol(&info->rhdr, "stack_arg32_2", &arg2, sizeof(arg2)); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_arg32_2"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + if (ramdisk) { + error_msg = "Ramdisks not supported with generic elf arguments"; + goto out; + } + } + else if (arg_style == ARG_STYLE_LINUX) { + struct x86_linux_faked_param_header *hdr; + unsigned long param_base; + const char *ramdisk_buf; + off_t ramdisk_length; + struct entry32_regs regs; + int rc = 0; + + /* Get the linux parameter header */ + hdr = xmalloc(sizeof(*hdr)); + + /* Hack: With some ld versions, vmlinux program headers show + * a gap of two pages between bss segment and data segment + * but effectively kernel considers it as bss segment and + * overwrites the any data placed there. Hence bloat the + * memsz of parameter segment to 16K to avoid being placed + * in such gaps. + * This is a makeshift solution until it is fixed in kernel + */ + param_base = add_buffer(info, hdr, sizeof(*hdr), 16*1024, + 16, 0, max_addr, 1); + + /* Initialize the parameter header */ + memset(hdr, 0, sizeof(*hdr)); + init_linux_parameters(&hdr->hdr); + + /* Add a ramdisk to the current image */ + ramdisk_buf = NULL; + ramdisk_length = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + rc = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (rc < 0) { + result = -1; + goto out; + } + /* Use new command line. */ + free(command_line); + command_line = modified_cmdline; + command_line_len = strlen(modified_cmdline) + 1; + modified_cmdline = NULL; + } + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, &hdr->hdr, param_base, + offsetof(struct x86_linux_faked_param_header, command_line), + command_line, command_line_len, + ramdisk_buf, ramdisk_length); + + /* Fill in the information bios calls would usually provide */ + setup_linux_system_parameters(info, &hdr->hdr); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.ebx = 0; /* Bootstrap processor */ + regs.esi = param_base; /* Pointer to the parameters */ + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + } + else { + error_msg = "Unknown argument style\n"; + } + +out: + free(command_line); + free(modified_cmdline); + if (error_msg) + die("%s", error_msg); + return result; +} diff --git a/kexec/arch/i386/kexec-mb2-x86.c b/kexec/arch/i386/kexec-mb2-x86.c new file mode 100644 index 0000000..0d2e93b --- /dev/null +++ b/kexec/arch/i386/kexec-mb2-x86.c @@ -0,0 +1,616 @@ +/* + * kexec-mb2-x86.c + * + * multiboot2 support for kexec to boot xen. + * + * Copyright (C) 2019 Varad Gautam (vrd at amazon.de), Amazon.com, Inc. or its affiliates. + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts taken from kexec-multiboot-x86.c, Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include "../../kexec-syscall.h" +#include "../../kexec-xen.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/multiboot2.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +struct multiboot2_header_info { + struct multiboot_header_tag_information_request *request_tag; + struct multiboot_header_tag_address *addr_tag; + struct multiboot_header_tag_entry_address *entry_addr_tag; + struct multiboot_header_tag_console_flags *console_tag; + struct multiboot_header_tag_framebuffer *fb_tag; + struct multiboot_header_tag_module_align *mod_align_tag; + struct multiboot_header_tag_relocatable *rel_tag; +} mhi; + +#define ALIGN_UP(addr, align) \ + ((addr + (typeof (addr)) align - 1) & ~((typeof (addr)) align - 1)) + +int multiboot2_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + + /* First of all, check that this is an ELF file for either x86 or x86-64 */ + i = elf_x86_any_probe(buf, buf_len, CORE_TYPE_UNDEF); + if (i < 0) + return i; + + /* Now look for a multiboot header. */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) + len = buf_len; + + memcpy(headerbuf, buf, len); + if (len < sizeof(struct multiboot_header)) { + /* Short file */ + return -1; + } + for (mbh = (struct multiboot_header *) headerbuf; + ((char *) mbh <= (char *) headerbuf + len - sizeof(struct multiboot_header)); + mbh = (struct multiboot_header *) ((char *) mbh + MULTIBOOT_HEADER_ALIGN)) { + if (mbh->magic == MULTIBOOT2_HEADER_MAGIC + && !((mbh->magic+mbh->architecture+mbh->header_length+mbh->checksum) & 0xffffffff)) { + /* Found multiboot header. */ + return 0; + } + } + /* Not multiboot */ + return -1; +} + +void multiboot2_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + +static size_t +multiboot2_get_mbi_size(int ranges, int cmdline_size, int modcount, int modcmd_size) +{ + size_t mbi_size; + + mbi_size = (2 * sizeof (uint32_t) /* u32 total_size, u32 reserved */ + + ALIGN_UP (sizeof (struct multiboot_tag_basic_meminfo), MULTIBOOT_TAG_ALIGN) + + ALIGN_UP ((sizeof (struct multiboot_tag_mmap) + + ranges * sizeof (struct multiboot_mmap_entry)), MULTIBOOT_TAG_ALIGN) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (cmdline_size, MULTIBOOT_TAG_ALIGN)) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1, MULTIBOOT_TAG_ALIGN)) + + (modcount * sizeof (struct multiboot_tag_module) + modcmd_size)) + + sizeof (struct multiboot_tag); /* end tag */ + + if (mhi.rel_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_load_base_addr), MULTIBOOT_TAG_ALIGN); + + if (mhi.fb_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_framebuffer), MULTIBOOT_TAG_ALIGN); + + return mbi_size; +} + +static void multiboot2_read_header_tags(void) +{ + struct multiboot_header_tag *tag; + + for (tag = (struct multiboot_header_tag *) (mbh + 1); + tag->type != MULTIBOOT_TAG_TYPE_END; + tag = (struct multiboot_header_tag *) ((char *) tag + ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN))) + { + switch (tag->type) + { + case MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST: + { + mhi.request_tag = (struct multiboot_header_tag_information_request *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_RELOCATABLE: + { + mhi.rel_tag = (struct multiboot_header_tag_relocatable *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ADDRESS: + { + mhi.addr_tag = (struct multiboot_header_tag_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS: + { + mhi.entry_addr_tag = (struct multiboot_header_tag_entry_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS: + { + mhi.console_tag = (struct multiboot_header_tag_console_flags *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_FRAMEBUFFER: + { + mhi.fb_tag = (struct multiboot_header_tag_framebuffer *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_MODULE_ALIGN: + { + mhi.mod_align_tag = (struct multiboot_header_tag_module_align *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64: + case MULTIBOOT_HEADER_TAG_EFI_BS: + /* Ignoring EFI. */ + break; + default: + { + if (!(tag->flags & MULTIBOOT_HEADER_TAG_OPTIONAL)) + fprintf(stderr, "unsupported tag: 0x%x", tag->type); + break; + } + } + } +} + +struct multiboot_mmap_entry *multiboot_construct_memory_map(struct memory_range *range, + int ranges, + unsigned long long *mem_lower, + unsigned long long *mem_upper) +{ + struct multiboot_mmap_entry *entries; + int i; + + *mem_lower = *mem_upper = 0; + entries = xmalloc(ranges * sizeof(*entries)); + for (i = 0; i < ranges; i++) { + entries[i].addr = range[i].start; + entries[i].len = range[i].end - range[i].start + 1; + + if (range[i].type == RANGE_RAM) { + entries[i].type = MULTIBOOT_MEMORY_AVAILABLE; + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > *mem_lower)) { + range[i].start = 0; + *mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > *mem_upper + 0x100000)) + *mem_upper = range[i].end - 0x100000; + } else if (range[i].type == RANGE_ACPI) + entries[i].type = MULTIBOOT_MEMORY_ACPI_RECLAIMABLE; + else if (range[i].type == RANGE_ACPI_NVS) + entries[i].type = MULTIBOOT_MEMORY_NVS; + else if (range[i].type == RANGE_RESERVED) + entries[i].type = MULTIBOOT_MEMORY_RESERVED; + } + return entries; +} + +static uint64_t multiboot2_make_mbi(struct kexec_info *info, char *cmdline, int cmdline_len, + unsigned long load_base_addr, void *mbi_buf, size_t mbi_bytes) +{ + uint64_t *ptrorig = mbi_buf; + struct multiboot_mmap_entry *mmap_entries; + unsigned long long mem_lower = 0, mem_upper = 0; + + *ptrorig = mbi_bytes; /* u32 total_size, u32 reserved */ + ptrorig++; + + mmap_entries = multiboot_construct_memory_map(info->memory_range, info->memory_ranges, &mem_lower, &mem_upper); + { + struct multiboot_tag_basic_meminfo *tag = (struct multiboot_tag_basic_meminfo *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BASIC_MEMINFO; + tag->size = sizeof (struct multiboot_tag_basic_meminfo); + tag->mem_lower = mem_lower >> 10; + tag->mem_upper = mem_upper >> 10; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_mmap *tag = (struct multiboot_tag_mmap *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_MMAP; + tag->size = sizeof(struct multiboot_tag_mmap) + sizeof(struct multiboot_mmap_entry) * info->memory_ranges; + tag->entry_size = sizeof(struct multiboot_mmap_entry); + tag->entry_version = 0; + memcpy(tag->entries, mmap_entries, tag->entry_size * info->memory_ranges); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.rel_tag) { + struct multiboot_tag_load_base_addr *tag = (struct multiboot_tag_load_base_addr *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR; + tag->size = sizeof (struct multiboot_tag_load_base_addr); + tag->load_base_addr = load_base_addr; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_CMDLINE; + tag->size = sizeof (struct multiboot_tag_string) + cmdline_len; + memcpy(tag->string, cmdline, cmdline_len); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME; + tag->size = sizeof(struct multiboot_tag_string) + strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1; + sprintf(tag->string, "%s", BOOTLOADER " " BOOTLOADER_VERSION); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.fb_tag) { + struct multiboot_tag_framebuffer *tag = (struct multiboot_tag_framebuffer *) ptrorig; + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + tag->common.type = MULTIBOOT_TAG_TYPE_FRAMEBUFFER; + tag->common.size = sizeof(struct multiboot_tag_framebuffer); + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + tag->common.framebuffer_addr = 0xb8000; + tag->common.framebuffer_pitch = 80*2; + tag->common.framebuffer_width = 80; + tag->common.framebuffer_height = 25; + tag->common.framebuffer_bpp = 16; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + goto out; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + goto out; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + goto out; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + goto out; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + goto out; + } + + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + tag->common.framebuffer_addr = info.smem_start; + tag->common.framebuffer_pitch = info.line_length; + tag->common.framebuffer_width = mode.xres; + tag->common.framebuffer_height = mode.yres; + tag->common.framebuffer_bpp = mode.bits_per_pixel; + + tag->framebuffer_red_field_position = mode.red.offset; + tag->framebuffer_red_mask_size = mode.red.length; + tag->framebuffer_green_field_position = mode.green.offset; + tag->framebuffer_green_mask_size = mode.green.length; + tag->framebuffer_blue_field_position = mode.blue.offset; + tag->framebuffer_blue_mask_size = mode.blue.length; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + +out: + return (uint64_t) (uintptr_t) ptrorig; +} + +static uint64_t multiboot2_mbi_add_module(void *mbi_buf, uint64_t mbi_ptr, uint32_t mod_start, + uint32_t mod_end, char *mod_clp) +{ + struct multiboot_tag_module *tag = (struct multiboot_tag_module *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_MODULE; + tag->size = sizeof(struct multiboot_tag_module) + strlen((char *)(long) mod_clp) + 1; + tag->mod_start = mod_start; + tag->mod_end = mod_end; + + memcpy(tag->cmdline, (char *)(long) mod_clp, strlen((char *)(long) mod_clp) + 1); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static uint64_t multiboot2_mbi_end(void *mbi_buf, uint64_t mbi_ptr) +{ + struct multiboot_tag *tag = (struct multiboot_tag *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_END; + tag->size = sizeof (struct multiboot_tag); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static inline int multiboot2_rel_valid(struct multiboot_header_tag_relocatable *rel_tag, + uint64_t rel_start, uint64_t rel_end) +{ + if (rel_start >= rel_tag->min_addr && rel_end <= rel_tag->max_addr) + return 1; + + return 0; +} + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + void *mbi_buf; + size_t mbi_bytes; + unsigned long addr; + struct entry32_regs regs; + char *command_line = NULL, *tmp_cmdline = NULL; + int command_line_len; + char *imagename, *cp, *append = NULL;; + int opt; + int modules, mod_command_line_space; + uint64_t mbi_ptr; + char *mod_clp_base; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + uint64_t rel_min, rel_max; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) + { + fprintf(stderr, "Cannot find a loadable multiboot2 header.\n"); + return -1; + } + + /* Parse the header tags. */ + multiboot2_read_header_tags(); + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) + { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + + if (xen_present() && info->kexec_flags & KEXEC_LIVE_UPDATE ) { + if (!mhi.rel_tag) { + fprintf(stderr, "Multiboot2 image must be relocatable" + "for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + cmdline_add_liveupdate(&command_line); + } + + command_line_len = strlen(command_line) + 1; + + /* Load the ELF executable */ + if (mhi.rel_tag) { + rel_min = mhi.rel_tag->min_addr; + rel_max = mhi.rel_tag->max_addr; + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + /* TODO also check if elf is xen */ + /* On a live update, load target xen over the current xen image. */ + uint64_t xen_start, xen_end; + + xen_get_kexec_range(KEXEC_RANGE_MA_XEN, &xen_start, &xen_end); + if (multiboot2_rel_valid(mhi.rel_tag, xen_start, xen_end)) { + rel_min = xen_start; + } else { + fprintf(stderr, "Cannot place Elf into " + "KEXEC_RANGE_MA_XEN for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + elf_exec_build_load_relocatable(info, &ehdr, buf, len, 0, + rel_min, rel_max, mhi.rel_tag->align); + } else + elf_exec_build_load(info, &ehdr, buf, len, 0); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + uint64_t lu_start, lu_end; + + xen_get_kexec_range(7 /* KEXEC_RANGE_MA_LIVEUPDATE */, &lu_start, &lu_end); + /* Fit everything else into lu_start-lu_end. First page after lu_start is + * reserved for LU breadcrumb. */ + rel_min = lu_start + 4096; + rel_max = lu_end; + } else { + rel_min = 0x500; + rel_max = ULONG_MAX; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + rel_min, rel_max, 1, 0); + + /* Construct information tags. */ + mbi_bytes = multiboot2_get_mbi_size(info->memory_ranges, command_line_len, modules, mod_command_line_space); + mbi_buf = xmalloc(mbi_bytes); + + mbi_ptr = multiboot2_make_mbi(info, command_line, command_line_len, info->rhdr.rel_addr, mbi_buf, mbi_bytes); + free(command_line); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + if (multiboot2_rel_valid(mhi.rel_tag, rel_min, rel_max)) { + /* Shrink the reloc range to fit into LU region for xen. */ + mhi.rel_tag->min_addr = rel_min; + mhi.rel_tag->max_addr = rel_max; + } else { + fprintf(stderr, "Multiboot2 image cannot be relocated into " + "KEXEC_RANGE_MA_LIVEUPDATE for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + int i = 0; + + mod_clp_base = xmalloc(mod_command_line_space); + + /* Go back and parse the module command lines */ + mod_clp = mod_clp_base; + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in. Always page align. */ + addr = add_buffer(info, buf, mod_size, mod_size, getpagesize(), + rel_min, rel_max, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + mbi_ptr = multiboot2_mbi_add_module(mbi_buf, mbi_ptr, addr, addr + mod_size, mod_clp); + + mod_clp += strlen(mod_clp) + 1; + i++; + } + + free(mod_clp_base); + } + + mbi_ptr = multiboot2_mbi_end(mbi_buf, mbi_ptr); + + if (sort_segments(info) < 0) + return -1; + + addr = add_buffer(info, mbi_buf, mbi_bytes, mbi_bytes, 4, + rel_min, rel_max, 1); + + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = MULTIBOOT2_BOOTLOADER_MAGIC; + regs.ebx = addr; + regs.eip = ehdr.e_entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + return 0; +} diff --git a/kexec/arch/i386/kexec-multiboot-x86.c b/kexec/arch/i386/kexec-multiboot-x86.c new file mode 100644 index 0000000..33c885a --- /dev/null +++ b/kexec/arch/i386/kexec-multiboot-x86.c @@ -0,0 +1,505 @@ +/* + * kexec-multiboot-x86.c + * + * (partial) multiboot support for kexec. Only supports ELF32 + * kernels, and a subset of the multiboot info page options + * (i.e. enough to boot the Xen hypervisor). + * + * TODO: + * - smarter allocation of new segments + * - proper support for the MULTIBOOT_VIDEO_MODE bit + * + * + * Copyright (C) 2003 Tim Deegan (tjd21 at cl.cam.ac.uk) + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts copied from kexec-elf32-x86.c, written by Eric Biederman + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/mb_header.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +static off_t mbh_offset = 0; + +#define MIN(_x,_y) (((_x)<=(_y))?(_x):(_y)) + + +int multiboot_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + /* Now look for a multiboot header in the first 8KB */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) { + len = buf_len; + } + memcpy(headerbuf, buf, len); + if (len < 12) { + /* Short file */ + return -1; + } + for (mbh_offset = 0; mbh_offset <= (len - 12); mbh_offset += 4) + { + /* Search for a multiboot header */ + mbh = (struct multiboot_header *)(headerbuf + mbh_offset); + if (mbh->magic != MULTIBOOT_MAGIC + || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff)) + { + /* Not a multiboot header */ + continue; + } + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + if (mbh->load_addr & 0xfff) { + fprintf(stderr, "multiboot load address not 4k aligned\n"); + return -1; + } + if (mbh->load_addr > mbh->header_addr) { + fprintf(stderr, "multiboot header address > load address\n"); + return -1; + } + if (mbh->load_end_addr < mbh->load_addr) { + fprintf(stderr, "multiboot load end address < load address\n"); + return -1; + } + if (mbh->bss_end_addr < mbh->load_end_addr) { + fprintf(stderr, "multiboot bss end address < load end address\n"); + return -1; + } + if (mbh->load_end_addr - mbh->header_addr > buf_len - mbh_offset) { + fprintf(stderr, "multiboot file truncated\n"); + return -1; + } + if (mbh->entry_addr < mbh->load_addr || mbh->entry_addr >= mbh->load_end_addr) { + fprintf(stderr, "multiboot entry out of range\n"); + return -1; + } + } else { + if ((i=elf_x86_probe(buf, buf_len)) < 0) + return i; + } + if (mbh->flags & MULTIBOOT_UNSUPPORTED) { + /* Requires options we don't support */ + fprintf(stderr, + "Found a multiboot header, but it " + "requires multiboot options that I\n" + "don't understand. Sorry.\n"); + return -1; + } + /* Bootable */ + return 0; + } + /* Not multiboot */ + return -1; +} + + +void multiboot_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + + +static int framebuffer_info(struct multiboot_info *mbi) +{ + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + mbi->framebuffer_addr = 0xb8000; + mbi->framebuffer_pitch = 80*2; + mbi->framebuffer_width = 80; + mbi->framebuffer_height = 25; + mbi->framebuffer_bpp = 16; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + return -1; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + return -1; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + return -1; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + return -1; + } + + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + mbi->framebuffer_addr = info.smem_start; + mbi->framebuffer_pitch = info.line_length; + mbi->framebuffer_width = mode.xres; + mbi->framebuffer_height = mode.yres; + mbi->framebuffer_bpp = mode.bits_per_pixel; + mbi->framebuffer_red_field_position = mode.red.offset; + mbi->framebuffer_red_mask_size = mode.red.length; + mbi->framebuffer_green_field_position = mode.green.offset; + mbi->framebuffer_green_mask_size = mode.green.length; + mbi->framebuffer_blue_field_position = mode.blue.offset; + mbi->framebuffer_blue_mask_size = mode.blue.length; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; +} + +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +/* Marshal up a multiboot-style kernel */ +{ + struct multiboot_info *mbi; + void *mbi_buf; + struct mod_list *modp; + unsigned long freespace; + unsigned long long mem_lower = 0, mem_upper = 0; + struct mem_ehdr ehdr; + unsigned long mbi_base; + struct entry32_regs regs; + size_t mbi_bytes, mbi_offset; + char *command_line = NULL, *tmp_cmdline = NULL; + char *imagename, *cp, *append = NULL;; + struct memory_range *range; + int ranges; + struct AddrRangeDesc *mmap; + int command_line_len; + int i, result; + uint32_t u, entry; + int opt; + int modules, mod_command_line_space; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) { + fprintf(stderr, "Cannot find a loadable multiboot header.\n"); + return -1; + } + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + command_line = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = strlen(command_line) + 1; + + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + add_segment(info, + buf + (mbh_offset - (mbh->header_addr - mbh->load_addr)), + mbh->load_end_addr - mbh->load_addr, + mbh->load_addr, + mbh->bss_end_addr - mbh->load_addr); + entry = mbh->entry_addr; + } else { + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + entry = ehdr.e_entry; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, 0, + ULONG_MAX, 1, 0); + + /* The first segment will contain the multiboot headers: + * ============= + * multiboot information (mbi) + * ------------- + * kernel command line + * ------------- + * bootloader name + * ------------- + * module information entries + * ------------- + * module command lines + * ============== + */ + mbi_bytes = _ALIGN(sizeof(*mbi) + command_line_len + + strlen (BOOTLOADER " " BOOTLOADER_VERSION) + 1, 4); + mbi_buf = xmalloc(mbi_bytes); + mbi = mbi_buf; + memset(mbi, 0, sizeof(*mbi)); + sprintf(((char *)mbi) + sizeof(*mbi), "%s", command_line); + sprintf(((char *)mbi) + sizeof(*mbi) + command_line_len, "%s", + BOOTLOADER " " BOOTLOADER_VERSION); + mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME; + /* We'll relocate these to absolute addresses later. For now, + * all addresses within the first segment are relative to the + * start of the MBI. */ + mbi->cmdline = sizeof(*mbi); + mbi->boot_loader_name = sizeof(*mbi) + command_line_len; + + /* Memory map */ + range = info->memory_range; + ranges = info->memory_ranges; + mmap = xmalloc(ranges * sizeof(*mmap)); + for (i=0; i<ranges; i++) { + unsigned long long length; + length = range[i].end - range[i].start + 1; + /* Translate bzImage mmap to multiboot-speak */ + mmap[i].size = sizeof(mmap[i]) - 4; + mmap[i].base_addr_low = range[i].start & 0xffffffff; + mmap[i].base_addr_high = range[i].start >> 32; + mmap[i].length_low = length & 0xffffffff; + mmap[i].length_high = length >> 32; + switch (range[i].type) { + case RANGE_RAM: + mmap[i].Type = 1; /* RAM */ + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > mem_lower)) { + range[i].start = 0; + mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > mem_upper + 0x100000)) + mem_upper = range[i].end - 0x100000; + break; + case RANGE_ACPI: + mmap[i].Type = 3; + break; + case RANGE_ACPI_NVS: + mmap[i].Type = 4; + break; + case RANGE_RESERVED: + default: + mmap[i].Type = 2; /* Not RAM (reserved) */ + } + } + + if (mbh->flags & MULTIBOOT_MEMORY_INFO) { + /* Provide a copy of the memory map to the kernel */ + + mbi->flags |= MB_INFO_MEMORY | MB_INFO_MEM_MAP; + + freespace = add_buffer(info, + mmap, ranges * sizeof(*mmap), ranges * sizeof(*mmap), + 4, 0, 0xFFFFFFFFUL, 1); + + mbi->mmap_addr = freespace; + mbi->mmap_length = ranges * sizeof(*mmap); + + /* For kernels that care naught for fancy memory maps + * and just want the size of low and high memory */ + mbi->mem_lower = MIN(mem_lower>>10, 0xffffffff); + mbi->mem_upper = MIN(mem_upper>>10, 0xffffffff); + + /* done */ + } + + /* Video */ + if (mbh->flags & MULTIBOOT_VIDEO_MODE) { + if (framebuffer_info(mbi) < 0) + fprintf(stderr, "not providing framebuffer information.\n"); + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + + /* We'll relocate this to an absolute address later */ + mbi->mods_addr = mbi_bytes; + mbi->mods_count = 0; + mbi->flags |= MB_INFO_MODS; + + /* Add room for the module descriptors to the MBI buffer */ + mbi_bytes += (sizeof(*modp) * modules) + + mod_command_line_space; + mbi_buf = xrealloc(mbi_buf, mbi_bytes); + + /* mbi might have moved */ + mbi = mbi_buf; + /* module descriptors go in the newly added space */ + modp = ((void *)mbi) + mbi->mods_addr; + /* module command lines go after the descriptors */ + mod_clp = ((void *)modp) + (sizeof(*modp) * modules); + + /* Go back and parse the module command lines */ + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in */ + freespace = add_buffer(info, + buf, mod_size, mod_size, + getpagesize(), 0, 0xffffffffUL, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + modp->mod_start = freespace; + modp->mod_end = freespace + mod_size; + modp->cmdline = (void *)mod_clp - (void *)mbi; + modp->pad = 0; + + /* Done */ + mbi->mods_count++; + mod_clp += strlen(mod_clp) + 1; + modp++; + } + } + + /* Find a place for the MBI to live */ + if (sort_segments(info) < 0) { + result = -1; + goto out; + } + mbi_base = add_buffer(info, + mbi_buf, mbi_bytes, mbi_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + /* Relocate offsets in the MBI to absolute addresses */ + mbi_offset = mbi_base; + modp = ((void *)mbi) + mbi->mods_addr; + for (u = 0; u < mbi->mods_count; u++) { + modp[u].cmdline += mbi_offset; + } + mbi->mods_addr += mbi_offset; + mbi->cmdline += mbi_offset; + mbi->boot_loader_name += mbi_offset; + + /* Specify the initial CPU state and copy the setup code */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = 0x2BADB002; + regs.ebx = mbi_offset; + regs.eip = entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + +out: + free(command_line); + return result; +} + +/* + * EOF (kexec-multiboot-x86.c) + */ diff --git a/kexec/arch/i386/kexec-nbi.c b/kexec/arch/i386/kexec-nbi.c new file mode 100644 index 0000000..8eb2154 --- /dev/null +++ b/kexec/arch/i386/kexec-nbi.c @@ -0,0 +1,249 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "kexec-x86.h" +#include <arch/options.h> + +struct segheader +{ + uint8_t length; + uint8_t vendortag; + uint8_t reserved; + uint8_t flags; +#define NBI_SEG 0x3 +#define NBI_SEG_ABSOLUTE 0 +#define NBI_SEG_APPEND 1 +#define NBI_SEG_NEGATIVE 2 +#define NBI_SEG_PREPEND 3 +#define NBI_LAST_SEG (1 << 2) + uint32_t loadaddr; + uint32_t imglength; + uint32_t memlength; +}; + +struct imgheader +{ +#define NBI_MAGIC "\x36\x13\x03\x1b" + uint8_t magic[4]; +#define NBI_RETURNS (1 << 8) +#define NBI_ENTRY32 (1 << 31) + uint32_t length; /* and flags */ + struct { uint16_t bx, ds; } segoff; + union { + struct { uint16_t ip, cs; } segoff; + uint32_t linear; + } execaddr; +}; + + +static const int probe_debug = 0; + +int nbi_probe(const char *buf, off_t len) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + /* If we don't have enough data give up */ + if (((uintmax_t)len < (uintmax_t)sizeof(hdr)) || (len < 512)) { + return -1; + } + memcpy(&hdr, buf, sizeof(hdr)); + if (memcmp(hdr.magic, NBI_MAGIC, sizeof(hdr.magic)) != 0) { + return -1; + } + /* Ensure we have a properly sized header */ + if (((hdr.length & 0xf)*4) != sizeof(hdr)) { + if (probe_debug) { + fprintf(stderr, "NBI: Bad vendor header size\n"); + } + return -1; + } + /* Ensure the vendor header is not too large. + * This can't actually happen but.... + */ + if ((((hdr.length & 0xf0) >> 4)*4) > (512 - sizeof(hdr))) { + if (probe_debug) { + fprintf(stderr, "NBI: vendor headr too large\n"); + } + return -1; + } + /* Reserved bits are set in the image... */ + if ((hdr.length & 0x7ffffe00)) { + if (probe_debug) { + fprintf(stderr, "NBI: Reserved header bits set\n"); + } + return -1; + } + /* If the image can return refuse to load it */ + if (hdr.length & (1 << 8)) { + if (probe_debug) { + printf("NBI: image wants to return\n"); + } + return -1; + } + /* Now verify the segments all fit within 512 bytes */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + memcpy(&seg, buf + seg_off, sizeof(seg)); + if ((seg.length & 0xf) != 4) { + if (probe_debug) { + fprintf(stderr, "NBI: Invalid segment length\n"); + } + return -1; + } + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if (seg.flags & 0xf8) { + if (probe_debug) { + fprintf(stderr, "NBI: segment reserved flags set\n"); + } + return -1; + } + if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + if (probe_debug) { + fprintf(stderr, "NBI: negative segment addresses not supported\n"); + } + return -1; + } + if (seg_off > 512) { + if (probe_debug) { + fprintf(stderr, "NBI: segment outside 512 header\n"); + } + return -1; + } + } while(!(seg.flags & NBI_LAST_SEG)); + return 0; +} + +void nbi_usage(void) +{ + printf( "\n" + ); +} + +int nbi_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + off_t file_off; + uint32_t last0, last1; + int opt; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + } + } + /* Get a copy of the header */ + memcpy(&hdr, buf, sizeof(hdr)); + + /* Load the first 512 bytes */ + add_segment(info, buf + 0, 512, + (hdr.segoff.ds << 4) + hdr.segoff.bx, 512); + + /* Initialize variables */ + file_off = 512; + last0 = (hdr.segoff.ds << 4) + hdr.segoff.bx; + last1 = last0 + 512; + + /* Load the segments */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + uint32_t loadaddr; + memcpy(&seg, buf + seg_off, sizeof(seg)); + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if ((seg.flags & NBI_SEG) == NBI_SEG_ABSOLUTE) { + loadaddr = seg.loadaddr; + } + else if ((seg.flags & NBI_SEG) == NBI_SEG_APPEND) { + loadaddr = last1 + seg.loadaddr; + } +#if 0 + else if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + loadaddr = memsize - seg.loadaddr; + } +#endif + else if ((seg.flags & NBI_SEG) == NBI_SEG_PREPEND) { + loadaddr = last0 - seg.loadaddr; + } + else { + printf("warning: unhandled segment of type %0x\n", + seg.flags & NBI_SEG); + continue; + } + add_segment(info, buf + file_off, seg.imglength, + loadaddr, seg.memlength); + last0 = loadaddr; + last1 = last0 + seg.memlength; + file_off += seg.imglength; + } while(!(seg.flags & NBI_LAST_SEG)); + + if (hdr.length & NBI_ENTRY32) { + struct entry32_regs regs32; + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + regs32.eip = hdr.execaddr.linear; + elf_rel_set_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + } + else { + struct entry32_regs regs32; + struct entry16_regs regs16; + + /* Initialize the 16 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + regs16.cs = hdr.execaddr.segoff.cs; + regs16.ip = hdr.execaddr.segoff.ip; + elf_rel_set_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + + /* Initialize the 32 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + } + return 0; +} diff --git a/kexec/arch/i386/kexec-x86-common.c b/kexec/arch/i386/kexec-x86-common.c new file mode 100644 index 0000000..ffc95a9 --- /dev/null +++ b/kexec/arch/i386/kexec-x86-common.c @@ -0,0 +1,444 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "../../kexec-xen.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" + +/* Used below but not present in (older?) xenctrl.h */ +#ifndef E820_PMEM +#define E820_PMEM 7 +#define E820_PRAM 12 +#endif + +/* + * The real mode IVT ends at 0x400. + * See https://wiki.osdev.org/Interrupt_Vector_Table. + */ +#define REALMODE_IVT_END 0x400 + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/** + * The old /proc/iomem parsing code. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_proc_iomem(struct memory_range **range, int *ranges) +{ + const char *iomem= proc_iomem(); + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + dbgprintf("%016Lx-%016Lx : %s", start, end, str); + + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } + else if (strncasecmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + type = RANGE_ACPI; + } + else if (memcmp(str, "ACPI Non-volatile Storage\n", 26) == 0) { + type = RANGE_ACPI_NVS; + } + else if (memcmp(str, "Persistent Memory (legacy)\n", 27) == 0) { + type = RANGE_PRAM; + } + else if (memcmp(str, "Persistent Memory\n", 18) == 0) { + type = RANGE_PMEM; + } + else { + continue; + } + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + + dbgprintf("%016Lx-%016Lx : %x\n", start, end, type); + + memory_ranges++; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +/** + * Calls the architecture independent get_firmware_memmap_ranges() to parse + * /sys/firmware/memmap and then do some x86 only modifications. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_sysfs(struct memory_range **range, int *ranges) +{ + int ret; + size_t range_number = MAX_MEMORY_RANGES; + + ret = get_firmware_memmap_ranges(memory_range, &range_number); + if (ret != 0) { + fprintf(stderr, "Parsing the /sys/firmware memory map failed. " + "Falling back to /proc/iomem.\n"); + return get_memory_ranges_proc_iomem(range, ranges); + } + + *range = memory_range; + *ranges = range_number; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +unsigned xen_e820_to_kexec_type(uint32_t type) +{ + switch (type) { + case E820_RAM: + return RANGE_RAM; + case E820_ACPI: + return RANGE_ACPI; + case E820_NVS: + return RANGE_ACPI_NVS; + case E820_PMEM: + return RANGE_PMEM; + case E820_PRAM: + return RANGE_PRAM; + case E820_RESERVED: + default: + return RANGE_RESERVED; + } +} + +/** + * Memory map detection for Xen. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + int rc, ret = -1; + struct e820entry e820entries[MAX_MEMORY_RANGES]; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(rc)); + goto err; + } + + for (i = 0; i < rc; ++i) { + memory_range[i].start = e820entries[i].addr; + memory_range[i].end = e820entries[i].addr + e820entries[i].size - 1; + memory_range[i].type = xen_e820_to_kexec_type(e820entries[i].type); + } + + qsort(memory_range, rc, sizeof(struct memory_range), compare_ranges); + + *range = memory_range; + *ranges = rc; + + ret = 0; + +err: + xc_interface_close(xc); + + return ret; +} +#else +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void remove_range(struct memory_range *range, int nr_ranges, int index) +{ + int i, j; + + for (i = index; i < (nr_ranges-1); i++) { + j = i+1; + range[i] = range[j]; + } +} + +/** + * Verifies and corrects any overlapping ranges. + * The ranges array is assumed to be sorted already. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int fixup_memory_ranges(struct memory_range **range, int *ranges) +{ + int i; + int j; + int change_made; + int nr_ranges = *ranges; + struct memory_range *rp = *range; + +again: + change_made = 0; + for (i = 0; i < (nr_ranges-1); i++) { + j = i+1; + if (rp[i].start > rp[j].start) { + fprintf(stderr, "memory out of order!!\n"); + return 1; + } + + if (rp[i].type != rp[j].type) + continue; + + if (rp[i].start == rp[j].start) { + if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } else { + remove_range(rp, nr_ranges, i); + nr_ranges--; + change_made++; + } + } else { + if (rp[i].end > rp[j].start) { + if (rp[i].end < rp[j].end) { + rp[j].start = rp[i].end; + change_made++; + } else if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } + } + } + } + + /* fixing/removing an entry may make it wrong relative to the next */ + if (change_made) + goto again; + + *ranges = nr_ranges; + return 0; +} + +/** + * Detect the add_efi_memmap kernel parameter. + * + * On some EFI-based systems, the e820 map is empty, or does not contain a + * complete memory map. The add_efi_memmap parameter adds these entries to + * the kernel's memory map, but does not add them under sysfs, which causes + * kexec to fail in a way similar to how it does not work on Xen. + * + * @return 1 if parameter is present, 0 if not or if an error occurs. + */ +int efi_map_added( void ) { + char buf[512]; + FILE *fp = fopen( "/proc/cmdline", "r" ); + if( fp ) { + fgets( buf, 512, fp ); + fclose( fp ); + return strstr( buf, "add_efi_memmap" ) != NULL; + } else { + return 0; + } +} + +/** + * Return a sorted list of memory ranges. + * + * If we have the /sys/firmware/memmap interface, then use that. If not, + * or if parsing of that fails, use /proc/iomem as fallback. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * @param[in] kexec_flags the kexec_flags to determine if we load a normal + * or a crashdump kernel + * + * @return 0 on success, any other value on failure. + */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int ret, i; + + if (!efi_map_added() && !xen_present() && have_sys_firmware_memmap()) { + ret = get_memory_ranges_sysfs(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else if (xen_present()) { + ret = get_memory_ranges_xen(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else + ret = get_memory_ranges_proc_iomem(range, ranges); + + /* + * get_memory_ranges_sysfs(), get_memory_ranges_proc_iomem() and + * get_memory_ranges_xen() have already printed an error message, + * so fail silently here. + */ + if (ret != 0) + return ret; + + /* Don't report the interrupt table as ram */ + for (i = 0; i < *ranges; i++) { + if ((*range)[i].type == RANGE_RAM && + ((*range)[i].start < REALMODE_IVT_END)) { + (*range)[i].start = REALMODE_IVT_END; + break; + } + } + + /* + * Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported values are + * subset of user defined values. + */ + if ((kexec_flags & KEXEC_ON_CRASH) && + !(kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + uint64_t start, end; + + ret = get_crash_kernel_load_range(&start, &end); + if (ret != 0) { + fprintf(stderr, "get_crash_kernel_load_range failed.\n"); + return -1; + } + + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + + return ret; +} + +static uint64_t bootparam_get_acpi_rsdp(void) { + uint64_t acpi_rsdp = 0; + off_t offset = offsetof(struct x86_linux_param_header, acpi_rsdp_addr); + + if (get_bootparam(&acpi_rsdp, offset, sizeof(acpi_rsdp))) + return 0; + + return acpi_rsdp; +} + +static uint64_t efi_get_acpi_rsdp(void) { + FILE *fp; + char line[MAX_LINE], *s; + uint64_t acpi_rsdp = 0; + + fp = fopen("/sys/firmware/efi/systab", "r"); + if (!fp) + return acpi_rsdp; + + while(fgets(line, sizeof(line), fp) != 0) { + /* ACPI20= always goes before ACPI= */ + if ((strstr(line, "ACPI20=")) || (strstr(line, "ACPI="))) { + s = strchr(line, '=') + 1; + sscanf(s, "0x%lx", &acpi_rsdp); + break; + } + } + fclose(fp); + + return acpi_rsdp; +} + +uint64_t get_acpi_rsdp(void) +{ + uint64_t acpi_rsdp = 0; + + acpi_rsdp = bootparam_get_acpi_rsdp(); + + if (!acpi_rsdp) + acpi_rsdp = efi_get_acpi_rsdp(); + + return acpi_rsdp; +} diff --git a/kexec/arch/i386/kexec-x86.c b/kexec/arch/i386/kexec-x86.c new file mode 100644 index 0000000..444cb69 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.c @@ -0,0 +1,210 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +struct file_type file_type[] = { + { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, + multiboot_x86_usage }, + { "multiboot2-x86", multiboot2_x86_probe, multiboot2_x86_load, + multiboot2_x86_usage }, + { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, + { "bzImage", bzImage_probe, bzImage_load, bzImage_usage }, + { "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage }, + { "nbi-x86", nbi_probe, nbi_load, nbi_usage }, +}; +int file_types = sizeof(file_type)/sizeof(file_type[0]); + + +void arch_usage(void) +{ + printf( + " --reset-vga Attempt to reset a standard vga device\n" + " --serial=<port> Specify the serial port for debug output\n" + " --serial-baud=<baud_rate> Specify the serial port baud rate\n" + " --console-vga Enable the vga console\n" + " --console-serial Enable the serial console\n" + " --elf32-core-headers Prepare core headers in ELF32 format\n" + " --elf64-core-headers Prepare core headers in ELF64 format\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" + ); +} + +struct arch_options_t arch_options = { + .reset_vga = 0, + .serial_base = 0x3f8, + .serial_baud = 0, + .console_vga = 0, + .console_serial = 0, + .core_header_type = CORE_TYPE_UNDEF, + .pass_memmap_cmdline = 0, + .noefi = 0, +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + unsigned long value; + char *end; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_RESET_VGA: + arch_options.reset_vga = 1; + break; + case OPT_CONSOLE_VGA: + arch_options.console_vga = 1; + break; + case OPT_CONSOLE_SERIAL: + arch_options.console_serial = 1; + break; + case OPT_SERIAL: + value = ULONG_MAX; + if (strcmp(optarg, "ttyS0") == 0) { + value = 0x3f8; + } + else if (strcmp(optarg, "ttyS1") == 0) { + value = 0x2f8; + } + else if (strncmp(optarg, "0x", 2) == 0) { + value = strtoul(optarg +2, &end, 16); + if (*end != '\0') { + value = ULONG_MAX; + } + } + if (value >= 65536) { + fprintf(stderr, "Bad serial port base '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_base = value; + break; + case OPT_SERIAL_BAUD: + value = strtoul(optarg, &end, 0); + if ((value > 115200) || ((115200 %value) != 0) || + (value < 9600) || (*end)) + { + fprintf(stderr, "Bad serial port baud rate '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_baud = value; + break; + case OPT_ELF32_CORE: + arch_options.core_header_type = CORE_TYPE_ELF32; + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_386 here. + */ + { "i386", KEXEC_ARCH_DEFAULT }, + { "i486", KEXEC_ARCH_DEFAULT }, + { "i586", KEXEC_ARCH_DEFAULT }, + { "i686", KEXEC_ARCH_DEFAULT }, + { "x86_64", KEXEC_ARCH_X86_64 }, + { 0, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *info) +{ + if ((info->kexec_flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_X86_64) + { + if (!info->rhdr.e_shdr) { + fprintf(stderr, + "A trampoline is required for cross architecture support\n"); + return -1; + } + elf_rel_set_symbol(&info->rhdr, "compat_x86_64_entry32", + &info->entry, sizeof(info->entry)); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "compat_x86_64"); + } + return 0; +} + +void arch_update_purgatory(struct kexec_info *info) +{ + uint8_t panic_kernel = 0; + + elf_rel_set_symbol(&info->rhdr, "reset_vga", + &arch_options.reset_vga, sizeof(arch_options.reset_vga)); + elf_rel_set_symbol(&info->rhdr, "serial_base", + &arch_options.serial_base, sizeof(arch_options.serial_base)); + elf_rel_set_symbol(&info->rhdr, "serial_baud", + &arch_options.serial_baud, sizeof(arch_options.serial_baud)); + elf_rel_set_symbol(&info->rhdr, "console_vga", + &arch_options.console_vga, sizeof(arch_options.console_vga)); + elf_rel_set_symbol(&info->rhdr, "console_serial", + &arch_options.console_serial, sizeof(arch_options.console_serial)); + elf_rel_set_symbol(&info->rhdr, "backup_src_start", + &info->backup_src_start, sizeof(info->backup_src_start)); + elf_rel_set_symbol(&info->rhdr, "backup_src_size", + &info->backup_src_size, sizeof(info->backup_src_size)); + if (info->kexec_flags & KEXEC_ON_CRASH) { + panic_kernel = 1; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &info->backup_start, sizeof(info->backup_start)); + } + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &panic_kernel, sizeof(panic_kernel)); +} diff --git a/kexec/arch/i386/kexec-x86.h b/kexec/arch/i386/kexec-x86.h new file mode 100644 index 0000000..46e2898 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.h @@ -0,0 +1,97 @@ +#ifndef KEXEC_X86_H +#define KEXEC_X86_H + +#define MAX_MEMORY_RANGES 2048 + +enum coretype { + CORE_TYPE_UNDEF = 0, + CORE_TYPE_ELF32 = 1, + CORE_TYPE_ELF64 = 2 +}; + +extern unsigned char compat_x86_64[]; +extern uint32_t compat_x86_64_size, compat_x86_64_entry32; + +struct entry32_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint32_t eip; +}; + +struct entry16_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint16_t ds; + uint16_t es; + uint16_t ss; + uint16_t fs; + uint16_t gs; + uint16_t ip; + uint16_t cs; + uint16_t pad; +}; + +struct arch_options_t { + uint8_t reset_vga; + uint16_t serial_base; + uint32_t serial_baud; + uint8_t console_vga; + uint8_t console_serial; + enum coretype core_header_type; + uint8_t pass_memmap_cmdline; + uint8_t noefi; + uint8_t reuse_video_type; +}; + +int multiboot_x86_probe(const char *buf, off_t len); +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot_x86_usage(void); + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot2_x86_usage(void); +int multiboot2_x86_probe(const char *buf, off_t buf_len); + +int elf_x86_probe(const char *buf, off_t len); +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch); +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_x86_usage(void); + +int bzImage_probe(const char *buf, off_t len); +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void bzImage_usage(void); +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry); + +int beoboot_probe(const char *buf, off_t len); +int beoboot_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void beoboot_usage(void); + +int nbi_probe(const char *buf, off_t len); +int nbi_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void nbi_usage(void); + +extern unsigned xen_e820_to_kexec_type(uint32_t type); +extern uint64_t get_acpi_rsdp(void); +#endif /* KEXEC_X86_H */ diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c new file mode 100644 index 0000000..9a281dc --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.c @@ -0,0 +1,971 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#define _GNU_SOURCE +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/random.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/fb.h> +#include <linux/screen_info.h> +#include <unistd.h> +#include <dirent.h> +#include <mntent.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "../../kexec/kexec-syscall.h" + +#ifndef VIDEO_CAPABILITY_64BIT_BASE +#define VIDEO_CAPABILITY_64BIT_BASE (1 << 1) /* Frame buffer base is 64-bit */ +#endif + +void init_linux_parameters(struct x86_linux_param_header *real_mode) +{ + /* Fill in the values that are usually provided by the kernel. */ + + /* Boot block magic */ + memcpy(real_mode->header_magic, "HdrS", 4); + real_mode->protocol_version = 0x0206; + real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + real_mode->cmdline_size = COMMAND_LINE_SIZE; +} + +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high) +{ + char *cmdline_ptr; + unsigned long initrd_base, initrd_addr_max; + + /* Say I'm a boot loader */ + real_mode->loader_type = LOADER_TYPE_KEXEC << 4; + + /* No loader flags */ + real_mode->loader_flags = 0; + + /* Find the maximum initial ramdisk address */ + if (initrd_high) + initrd_addr_max = ULONG_MAX; + else { + initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + if (real_mode->protocol_version >= 0x0203) { + initrd_addr_max = real_mode->initrd_addr_max; + dbgprintf("initrd_addr_max is 0x%lx\n", + initrd_addr_max); + } + } + + /* Load the initrd if we have one */ + if (initrd_buf) { + initrd_base = add_buffer(info, + initrd_buf, initrd_size, initrd_size, + 4096, INITRD_BASE, initrd_addr_max, -1); + dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base, + initrd_size); + } else { + initrd_base = 0; + initrd_size = 0; + } + + /* Ramdisk address and size */ + real_mode->initrd_start = initrd_base & 0xffffffffUL; + real_mode->initrd_size = initrd_size & 0xffffffffUL; + + if (real_mode->protocol_version >= 0x020c && + (initrd_base & 0xffffffffUL) != initrd_base) + real_mode->ext_ramdisk_image = initrd_base >> 32; + + if (real_mode->protocol_version >= 0x020c && + (initrd_size & 0xffffffffUL) != initrd_size) + real_mode->ext_ramdisk_size = initrd_size >> 32; + + /* The location of the command line */ + /* if (real_mode_base == 0x90000) { */ + real_mode->cl_magic = CL_MAGIC_VALUE; + real_mode->cl_offset = cmdline_offset; + /* setup_move_size */ + /* } */ + if (real_mode->protocol_version >= 0x0202) { + unsigned long cmd_line_ptr = real_mode_base + cmdline_offset; + + real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffffUL; + if ((real_mode->protocol_version >= 0x020c) && + ((cmd_line_ptr & 0xffffffffUL) != cmd_line_ptr)) + real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32; + } + + /* Fill in the command line */ + if (cmdline_len > COMMAND_LINE_SIZE) { + cmdline_len = COMMAND_LINE_SIZE; + } + cmdline_ptr = ((char *)real_mode) + cmdline_offset; + memcpy(cmdline_ptr, cmdline, cmdline_len); + cmdline_ptr[cmdline_len - 1] = '\0'; +} + +static int setup_linux_vesafb(struct x86_linux_param_header *real_mode) +{ + struct fb_fix_screeninfo fix; + struct fb_var_screeninfo var; + int fd; + + fd = open("/dev/fb0", O_RDONLY); + if (-1 == fd) + return -1; + + if (-1 == ioctl(fd, FBIOGET_FSCREENINFO, &fix)) + goto out; + if (-1 == ioctl(fd, FBIOGET_VSCREENINFO, &var)) + goto out; + if (0 == strcmp(fix.id, "VESA VGA")) { + /* VIDEO_TYPE_VLFB */ + real_mode->orig_video_isVGA = 0x23; + } else if (0 == strcmp(fix.id, "EFI VGA")) { + /* VIDEO_TYPE_EFI */ + real_mode->orig_video_isVGA = 0x70; + } else if (arch_options.reuse_video_type) { + int err; + off_t offset = offsetof(typeof(*real_mode), orig_video_isVGA); + + /* blindly try old boot time video type */ + err = get_bootparam(&real_mode->orig_video_isVGA, offset, 1); + if (err) + goto out; + } else { + real_mode->orig_video_isVGA = 0; + close(fd); + return 0; + } + close(fd); + + real_mode->lfb_width = var.xres; + real_mode->lfb_height = var.yres; + real_mode->lfb_depth = var.bits_per_pixel; + real_mode->lfb_base = fix.smem_start & 0xffffffffUL; + real_mode->lfb_linelength = fix.line_length; + real_mode->vesapm_seg = 0; + + if (fix.smem_start > 0xffffffffUL) { + real_mode->ext_lfb_base = fix.smem_start >> 32; + real_mode->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + + /* FIXME: better get size from the file returned by proc_iomem() */ + real_mode->lfb_size = (fix.smem_len + 65535) / 65536; + real_mode->pages = (fix.smem_len + 4095) / 4096; + + if (var.bits_per_pixel > 8) { + real_mode->red_pos = var.red.offset; + real_mode->red_size = var.red.length; + real_mode->green_pos = var.green.offset; + real_mode->green_size = var.green.length; + real_mode->blue_pos = var.blue.offset; + real_mode->blue_size = var.blue.length; + real_mode->rsvd_pos = var.transp.offset; + real_mode->rsvd_size = var.transp.length; + } + return 0; + + out: + close(fd); + return -1; +} + +#define EDD_SYFS_DIR "/sys/firmware/edd" + +#define EDD_EXT_FIXED_DISK_ACCESS (1 << 0) +#define EDD_EXT_DEVICE_LOCKING_AND_EJECTING (1 << 1) +#define EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT (1 << 2) +#define EDD_EXT_64BIT_EXTENSIONS (1 << 3) + +/* + * Scans one line from a given filename. Returns on success the number of + * items written (same like scanf()). + */ +static int file_scanf(const char *dir, const char *file, const char *scanf_line, ...) +{ + va_list argptr; + FILE *fp; + int retno; + char filename[PATH_MAX]; + + snprintf(filename, PATH_MAX, "%s/%s", dir, file); + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + va_start(argptr, scanf_line); + retno = vfscanf(fp, scanf_line, argptr); + va_end(argptr); + + fclose(fp); + + return retno; +} + +static int parse_edd_extensions(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + char line[1024]; + uint16_t flags = 0; + FILE *fp; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "extensions"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + while (fgets(line, 1024, fp)) { + /* + * strings are in kernel source, function edd_show_extensions() + * drivers/firmware/edd.c + */ + if (strstr(line, "Fixed disk access") == line) + flags |= EDD_EXT_FIXED_DISK_ACCESS; + else if (strstr(line, "Device locking and ejecting") == line) + flags |= EDD_EXT_DEVICE_LOCKING_AND_EJECTING; + else if (strstr(line, "Enhanced Disk Drive support") == line) + flags |= EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT; + else if (strstr(line, "64-bit extensions") == line) + flags |= EDD_EXT_64BIT_EXTENSIONS; + } + + fclose(fp); + + edd_info->interface_support = flags; + + return 0; +} + +static int read_edd_raw_data(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + FILE *fp; + size_t read_chars; + uint16_t len; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "raw_data"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + memset(edd_info->edd_device_params, 0, EDD_DEVICE_PARAM_SIZE); + read_chars = fread(edd_info->edd_device_params, sizeof(uint8_t), + EDD_DEVICE_PARAM_SIZE, fp); + fclose(fp); + + len = ((uint16_t *)edd_info->edd_device_params)[0]; + dbgprintf("EDD raw data has length %d\n", len); + + if (read_chars < len) { + fprintf(stderr, "BIOS reported EDD length of %hd but only " + "%d chars read.\n", len, (int)read_chars); + return -1; + } + + return 0; +} + +static int add_edd_entry(struct x86_linux_param_header *real_mode, + const char *sysfs_name, int *current_edd, int *current_mbr) +{ + uint8_t devnum, version; + uint32_t mbr_sig; + struct edd_info *edd_info; + + if (!current_mbr || !current_edd) { + fprintf(stderr, "%s: current_edd and current_edd " + "must not be NULL", __FUNCTION__); + return -1; + } + + edd_info = &real_mode->eddbuf[*current_edd]; + memset(edd_info, 0, sizeof(struct edd_info)); + + /* extract the device number */ + if (sscanf(basename(sysfs_name), "int13_dev%hhx", &devnum) != 1) { + fprintf(stderr, "Invalid format of int13_dev dir " + "entry: %s\n", basename(sysfs_name)); + return -1; + } + + /* if there's a MBR signature, then add it */ + if (file_scanf(sysfs_name, "mbr_signature", "0x%x", &mbr_sig) == 1) { + real_mode->edd_mbr_sig_buffer[*current_mbr] = mbr_sig; + (*current_mbr)++; + dbgprintf("EDD Device 0x%x: mbr_sig=0x%x\n", devnum, mbr_sig); + } + + /* set the device number */ + edd_info->device = devnum; + + /* set the version */ + if (file_scanf(sysfs_name, "version", "0x%hhx", &version) != 1) + return -1; + + edd_info->version = version; + + /* if version == 0, that's some kind of dummy entry */ + if (version != 0) { + /* legacy_max_cylinder */ + if (file_scanf(sysfs_name, "legacy_max_cylinder", "%hu", + &edd_info->legacy_max_cylinder) != 1) { + fprintf(stderr, "Reading legacy_max_cylinder failed.\n"); + return -1; + } + + /* legacy_max_head */ + if (file_scanf(sysfs_name, "legacy_max_head", "%hhu", + &edd_info->legacy_max_head) != 1) { + fprintf(stderr, "Reading legacy_max_head failed.\n"); + return -1; + } + + /* legacy_sectors_per_track */ + if (file_scanf(sysfs_name, "legacy_sectors_per_track", "%hhu", + &edd_info->legacy_sectors_per_track) != 1) { + fprintf(stderr, "Reading legacy_sectors_per_track failed.\n"); + return -1; + } + + /* Parse the EDD extensions */ + if (parse_edd_extensions(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Parsing EDD extensions failed.\n"); + return -1; + } + + /* Parse the raw info */ + if (read_edd_raw_data(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Reading EDD raw data failed.\n"); + return -1; + } + } + + (*current_edd)++; + + return 0; +} + +static void zero_edd(struct x86_linux_param_header *real_mode) +{ + real_mode->eddbuf_entries = 0; + real_mode->edd_mbr_sig_buf_entries = 0; + memset(real_mode->eddbuf, 0, + EDDMAXNR * sizeof(struct edd_info)); + memset(real_mode->edd_mbr_sig_buffer, 0, + EDD_MBR_SIG_MAX * sizeof(uint32_t)); +} + +void setup_edd_info(struct x86_linux_param_header *real_mode) +{ + DIR *edd_dir; + struct dirent *cursor; + int current_edd = 0; + int current_mbr = 0; + + edd_dir = opendir(EDD_SYFS_DIR); + if (!edd_dir) { + dbgprintf(EDD_SYFS_DIR " does not exist.\n"); + return; + } + + zero_edd(real_mode); + while ((cursor = readdir(edd_dir))) { + char full_dir_name[PATH_MAX]; + + /* only read the entries that start with "int13_dev" */ + if (strstr(cursor->d_name, "int13_dev") != cursor->d_name) + continue; + + snprintf(full_dir_name, PATH_MAX, "%s/%s", + EDD_SYFS_DIR, cursor->d_name); + full_dir_name[PATH_MAX-1] = 0; + + if (add_edd_entry(real_mode, full_dir_name, ¤t_edd, + ¤t_mbr) != 0) { + zero_edd(real_mode); + goto out; + } + } + + real_mode->eddbuf_entries = current_edd; + real_mode->edd_mbr_sig_buf_entries = current_mbr; + +out: + closedir(edd_dir); + + dbgprintf("Added %d EDD MBR entries and %d EDD entries.\n", + real_mode->edd_mbr_sig_buf_entries, + real_mode->eddbuf_entries); +} + +/* + * This really only makes sense for virtual filesystems that are only expected + * to be mounted once (sysfs, debugsfs, proc), as it will return the first + * instance listed in /proc/mounts, falling back to mtab if absent. + * We search by type and not by name because the name can be anything; + * while setting the name equal to the mount point is common, it cannot be + * relied upon, as even kernel documentation examples recommends using + * "none" as the name e.g. for debugfs. + */ +char *find_mnt_by_type(char *type) +{ + FILE *mtab; + struct mntent *mnt; + char *mntdir; + + mtab = setmntent("/proc/mounts", "r"); + if (!mtab) { + // Fall back to mtab + mtab = setmntent("/etc/mtab", "r"); + } + if (!mtab) + return NULL; + for(mnt = getmntent(mtab); mnt; mnt = getmntent(mtab)) { + if (strcmp(mnt->mnt_type, type) == 0) + break; + } + mntdir = mnt ? strdup(mnt->mnt_dir) : NULL; + endmntent(mtab); + return mntdir; +} + +int get_bootparam(void *buf, off_t offset, size_t size) +{ + int data_file; + char *debugfs_mnt, *sysfs_mnt; + char filename[PATH_MAX]; + int err, has_sysfs_params = 0; + + sysfs_mnt = find_mnt_by_type("sysfs"); + if (sysfs_mnt) { + snprintf(filename, PATH_MAX, "%s/%s", sysfs_mnt, + "kernel/boot_params/data"); + free(sysfs_mnt); + err = access(filename, F_OK); + if (!err) + has_sysfs_params = 1; + } + + if (!has_sysfs_params) { + debugfs_mnt = find_mnt_by_type("debugfs"); + if (!debugfs_mnt) + return 1; + snprintf(filename, PATH_MAX, "%s/%s", debugfs_mnt, + "boot_params/data"); + free(debugfs_mnt); + } + + data_file = open(filename, O_RDONLY); + if (data_file < 0) + return 1; + if (lseek(data_file, offset, SEEK_SET) < 0) + goto close; + read(data_file, buf, size); +close: + close(data_file); + return 0; +} + +void setup_subarch(struct x86_linux_param_header *real_mode) +{ + off_t offset = offsetof(typeof(*real_mode), hardware_subarch); + + get_bootparam(&real_mode->hardware_subarch, offset, sizeof(uint32_t)); +} + +struct efi_mem_descriptor { + uint32_t type; + uint32_t pad; + uint64_t phys_addr; + uint64_t virt_addr; + uint64_t num_pages; + uint64_t attribute; +}; + +struct efi_setup_data { + uint64_t fw_vendor; + uint64_t runtime; + uint64_t tables; + uint64_t smbios; + uint64_t reserved[8]; +}; + +struct setup_data { + uint64_t next; + uint32_t type; +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_RNG_SEED 9 + uint32_t len; + uint8_t data[0]; +} __attribute__((packed)); + +static int get_efi_value(const char *filename, + const char *pattern, uint64_t *val) +{ + FILE *fp; + char line[1024], *s, *end; + + fp = fopen(filename, "r"); + if (!fp) + return 1; + + while (fgets(line, sizeof(line), fp) != 0) { + s = strstr(line, pattern); + if (!s) + continue; + *val = strtoull(s + strlen(pattern), &end, 16); + if (*val == ULLONG_MAX) { + fclose(fp); + return 2; + } + break; + } + + fclose(fp); + return 0; +} + +static int get_efi_values(struct efi_setup_data *esd) +{ + int ret = 0; + + ret = get_efi_value("/sys/firmware/efi/systab", "SMBIOS=0x", + &esd->smbios); + ret |= get_efi_value("/sys/firmware/efi/fw_vendor", "0x", + &esd->fw_vendor); + ret |= get_efi_value("/sys/firmware/efi/runtime", "0x", + &esd->runtime); + ret |= get_efi_value("/sys/firmware/efi/config_table", "0x", + &esd->tables); + return ret; +} + +static int get_efi_runtime_map(struct efi_mem_descriptor **map) +{ + DIR *dirp; + struct dirent *entry; + char filename[1024]; + struct efi_mem_descriptor md, *p = NULL; + int nr_maps = 0; + + dirp = opendir("/sys/firmware/efi/runtime-map"); + if (!dirp) + return 0; + while ((entry = readdir(dirp)) != NULL) { + sprintf(filename, + "/sys/firmware/efi/runtime-map/%s", + (char *)entry->d_name); + if (*entry->d_name == '.') + continue; + file_scanf(filename, "type", "0x%x", (unsigned int *)&md.type); + file_scanf(filename, "phys_addr", "0x%llx", + (unsigned long long *)&md.phys_addr); + file_scanf(filename, "virt_addr", "0x%llx", + (unsigned long long *)&md.virt_addr); + file_scanf(filename, "num_pages", "0x%llx", + (unsigned long long *)&md.num_pages); + file_scanf(filename, "attribute", "0x%llx", + (unsigned long long *)&md.attribute); + p = realloc(p, (nr_maps + 1) * sizeof(md)); + if (!p) + goto err_out; + + *(p + nr_maps) = md; + *map = p; + nr_maps++; + } + + closedir(dirp); + return nr_maps; +err_out: + if (*map) + free(*map); + closedir(dirp); + return 0; +} + +struct efi_info { + uint32_t efi_loader_signature; + uint32_t efi_systab; + uint32_t efi_memdesc_size; + uint32_t efi_memdesc_version; + uint32_t efi_memmap; + uint32_t efi_memmap_size; + uint32_t efi_systab_hi; + uint32_t efi_memmap_hi; +}; + +/* + * Add another instance to single linked list of struct setup_data. + * Please refer to kernel Documentation/x86/boot.txt for more details + * about setup_data structure. + */ +static void add_setup_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode, + struct setup_data *sd) +{ + int sdsize = sizeof(struct setup_data) + sd->len; + + sd->next = real_mode->setup_data; + real_mode->setup_data = add_buffer(info, sd, sdsize, sdsize, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); +} + +/* + * setup_efi_data will collect below data and pass them to 2nd kernel. + * 1) SMBIOS, fw_vendor, runtime, config_table, they are passed via x86 + * setup_data. + * 2) runtime memory regions, set the memmap related fields in efi_info. + */ +static int setup_efi_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int64_t memmap_paddr; + struct setup_data *sd; + struct efi_setup_data *esd; + struct efi_mem_descriptor *maps; + int nr_maps, size, ret = 0; + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + ret = access("/sys/firmware/efi/systab", F_OK); + if (ret < 0) + goto out; + + esd = malloc(sizeof(struct efi_setup_data)); + if (!esd) { + ret = 1; + goto out; + } + memset(esd, 0, sizeof(struct efi_setup_data)); + ret = get_efi_values(esd); + if (ret) + goto free_esd; + nr_maps = get_efi_runtime_map(&maps); + if (!nr_maps) { + ret = 2; + goto free_esd; + } + sd = malloc(sizeof(struct setup_data) + sizeof(*esd)); + if (!sd) { + ret = 3; + goto free_maps; + } + + memset(sd, 0, sizeof(struct setup_data) + sizeof(*esd)); + sd->next = 0; + sd->type = SETUP_EFI; + sd->len = sizeof(*esd); + memcpy(sd->data, esd, sizeof(*esd)); + free(esd); + + add_setup_data(info, real_mode, sd); + + size = nr_maps * sizeof(struct efi_mem_descriptor); + memmap_paddr = add_buffer(info, maps, size, size, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); + ei->efi_memmap = memmap_paddr & 0xffffffff; + ei->efi_memmap_hi = memmap_paddr >> 32; + ei->efi_memmap_size = size; + ei->efi_memdesc_size = sizeof(struct efi_mem_descriptor); + + return 0; +free_maps: + free(maps); +free_esd: + free(esd); +out: + return ret; +} + +static void add_e820_map_from_mr(struct x86_linux_param_header *real_mode, + struct e820entry *e820, struct memory_range *range, int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + e820[i].addr = range[i].start; + e820[i].size = range[i].end - range[i].start + 1; + switch (range[i].type) { + case RANGE_RAM: + e820[i].type = E820_RAM; + break; + case RANGE_ACPI: + e820[i].type = E820_ACPI; + break; + case RANGE_ACPI_NVS: + e820[i].type = E820_NVS; + break; + case RANGE_PMEM: + e820[i].type = E820_PMEM; + break; + case RANGE_PRAM: + e820[i].type = E820_PRAM; + break; + default: + case RANGE_RESERVED: + e820[i].type = E820_RESERVED; + break; + } + dbgprintf("%016lx-%016lx (%d)\n", + e820[i].addr, + e820[i].addr + e820[i].size - 1, + e820[i].type); + + if (range[i].type != RANGE_RAM) + continue; + if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { + unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); + real_mode->ext_mem_k = mem_k; + real_mode->alt_mem_k = mem_k; + if (mem_k > 0xfc00) { + real_mode->ext_mem_k = 0xfc00; /* 64M */ + } + if (mem_k > 0xffffffff) { + real_mode->alt_mem_k = 0xffffffff; + } + } + } +} + +static void setup_e820_ext(struct kexec_info *info, struct x86_linux_param_header *real_mode, + struct memory_range *range, int nr_range) +{ + struct setup_data *sd; + struct e820entry *e820; + int nr_range_ext; + + nr_range_ext = nr_range - E820MAX; + sd = xmalloc(sizeof(struct setup_data) + nr_range_ext * sizeof(struct e820entry)); + sd->next = 0; + sd->len = nr_range_ext * sizeof(struct e820entry); + sd->type = SETUP_E820_EXT; + + e820 = (struct e820entry *) sd->data; + dbgprintf("Extended E820 via setup_data:\n"); + add_e820_map_from_mr(real_mode, e820, range + E820MAX, nr_range_ext); + add_setup_data(info, real_mode, sd); +} + +static void setup_e820(struct kexec_info *info, struct x86_linux_param_header *real_mode) +{ + struct memory_range *range; + int nr_range, nr_range_saved; + + + if (info->kexec_flags & KEXEC_ON_CRASH && !arch_options.pass_memmap_cmdline) { + range = info->crash_range; + nr_range = info->nr_crash_ranges; + } else { + range = info->memory_range; + nr_range = info->memory_ranges; + } + + nr_range_saved = nr_range; + if (nr_range > E820MAX) { + nr_range = E820MAX; + } + + real_mode->e820_map_nr = nr_range; + dbgprintf("E820 memmap:\n"); + add_e820_map_from_mr(real_mode, real_mode->e820_map, range, nr_range); + + if (nr_range_saved > E820MAX) { + dbgprintf("extra E820 memmap are passed via setup_data\n"); + setup_e820_ext(info, real_mode, range, nr_range_saved); + } +} + +static void setup_rng_seed(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + struct { + struct setup_data header; + uint8_t rng_seed[32]; + } *sd; + + sd = xmalloc(sizeof(*sd)); + sd->header.next = 0; + sd->header.len = sizeof(sd->rng_seed); + sd->header.type = SETUP_RNG_SEED; + + if (getrandom(sd->rng_seed, sizeof(sd->rng_seed), GRND_NONBLOCK) != + sizeof(sd->rng_seed)) + return; /* Not initialized, so don't pass a seed. */ + + add_setup_data(info, real_mode, &sd->header); +} + +static int +get_efi_mem_desc_version(struct x86_linux_param_header *real_mode) +{ + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + return ei->efi_memdesc_version; +} + +static void setup_efi_info(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int ret, desc_version; + off_t offset = offsetof(typeof(*real_mode), efi_info); + + ret = get_bootparam(&real_mode->efi_info, offset, 32); + if (ret) + return; + if (((struct efi_info *)real_mode->efi_info)->efi_memmap_size == 0) + /* zero filled efi_info */ + goto out; + desc_version = get_efi_mem_desc_version(real_mode); + if (desc_version != 1) { + fprintf(stderr, + "efi memory descriptor version %d is not supported!\n", + desc_version); + goto out; + } + ret = setup_efi_data(info, real_mode); + if (ret) + goto out; + + return; + +out: + memset(&real_mode->efi_info, 0, 32); + return; +} + +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int err; + + /* get subarch from running kernel */ + setup_subarch(real_mode); + if (bzImage_support_efi_boot && !arch_options.noefi) + setup_efi_info(info, real_mode); + + /* Default screen size */ + real_mode->orig_x = 0; + real_mode->orig_y = 0; + real_mode->orig_video_page = 0; + real_mode->orig_video_mode = 0; + real_mode->orig_video_cols = 80; + real_mode->orig_video_lines = 25; + real_mode->orig_video_ega_bx = 0; + real_mode->orig_video_isVGA = 1; + real_mode->orig_video_points = 16; + + /* setup vesa fb if possible, or just use original screen_info */ + err = setup_linux_vesafb(real_mode); + if (err) { + uint16_t cl_magic, cl_offset; + + /* save and restore the old cmdline param if needed */ + cl_magic = real_mode->cl_magic; + cl_offset = real_mode->cl_offset; + + err = get_bootparam(real_mode, 0, sizeof(struct screen_info)); + if (!err) { + real_mode->cl_magic = cl_magic; + real_mode->cl_offset = cl_offset; + } + } + /* Fill in the memsize later */ + real_mode->ext_mem_k = 0; + real_mode->alt_mem_k = 0; + real_mode->e820_map_nr = 0; + + /* Default APM info */ + memset(&real_mode->apm_bios_info, 0, sizeof(real_mode->apm_bios_info)); + /* Default drive info */ + memset(&real_mode->drive_info, 0, sizeof(real_mode->drive_info)); + /* Default sysdesc table */ + real_mode->sys_desc_table.length = 0; + + /* default yes: this can be overridden on the command line */ + real_mode->mount_root_rdonly = 0xFFFF; + + /* default /dev/hda + * this can be overrident on the command line if necessary. + */ + real_mode->root_dev = (0x3 <<8)| 0; + + /* another safe default */ + real_mode->aux_device_info = 0; + + setup_e820(info, real_mode); + + /* pass RNG seed */ + setup_rng_seed(info, real_mode); + + /* fill the EDD information */ + setup_edd_info(real_mode); + + /* Always try to fill acpi_rsdp_addr */ + real_mode->acpi_rsdp_addr = get_acpi_rsdp(); +} + +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len) +{ + struct setup_data *sd; + + sd = xmalloc(sizeof(struct setup_data) + dtb_len); + sd->next = 0; + sd->len = dtb_len; + sd->type = SETUP_DTB; + memcpy(sd->data, dtb_buf, dtb_len); + + + add_setup_data(info, real_mode, sd); +} diff --git a/kexec/arch/i386/x86-linux-setup.h b/kexec/arch/i386/x86-linux-setup.h new file mode 100644 index 0000000..b5e1ad5 --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.h @@ -0,0 +1,38 @@ +#ifndef X86_LINUX_SETUP_H +#define X86_LINUX_SETUP_H +#include <x86/x86-linux.h> + +void init_linux_parameters(struct x86_linux_param_header *real_mode); +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high); +static inline void setup_linux_bootloader_parameters( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size) +{ + setup_linux_bootloader_parameters_high(info, + real_mode, real_mode_base, + cmdline_offset, cmdline, cmdline_len, + initrd_buf, initrd_size, 0); +} +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode); +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len); +int get_bootparam(void *buf, off_t offset, size_t size); + + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +/* command line parameter may be appended by purgatory */ +#define PURGATORY_CMDLINE_SIZE 64 +extern int bzImage_support_efi_boot; +extern struct arch_options_t arch_options; + +#endif /* X86_LINUX_SETUP_H */ diff --git a/kexec/arch/ia64/Makefile b/kexec/arch/ia64/Makefile new file mode 100644 index 0000000..f5b212b --- /dev/null +++ b/kexec/arch/ia64/Makefile @@ -0,0 +1,16 @@ +# +# kexec ia64 (linux booting linux) +# +ia64_KEXEC_SRCS = kexec/arch/ia64/kexec-iomem.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-elf-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-elf-rel-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/crashdump-ia64.c + +ia64_PROC_IOMEM = + +dist += kexec/arch/ia64/Makefile $(ia64_KEXEC_SRCS) \ + kexec/arch/ia64/kexec-ia64.h kexec/arch/ia64/crashdump-ia64.h \ + kexec/arch/ia64/include/arch/options.h + + diff --git a/kexec/arch/ia64/crashdump-ia64.c b/kexec/arch/ia64/crashdump-ia64.c new file mode 100644 index 0000000..755ee5e --- /dev/null +++ b/kexec/arch/ia64/crashdump-ia64.c @@ -0,0 +1,293 @@ +/* + * kexec: crashdump support + * Copyright (C) 2005-2006 Zou Nan hai <nanhai.zou@intel.com> Intel Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ia64.h" +#include "crashdump-ia64.h" +#include "../kexec/crashdump.h" + +int memory_ranges = 0; +#define LOAD_OFFSET (0xa000000000000000UL + 0x100000000UL - \ + kernel_code_start) + +static struct crash_elf_info elf_info = +{ + class: ELFCLASS64, + data: ELFDATA2LSB, + machine: EM_IA_64, + page_offset: PAGE_OFFSET, +}; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region. + * The number of entries in memory_range array is always smaller than + * the number of entries in the file returned by proc_iomem(), + * stored in max_memory_ranges. */ +static struct memory_range *crash_memory_range; +/* Memory region reserved for storing panic kernel and other data. */ +static struct memory_range crash_reserved_mem; +unsigned long elfcorehdr; +static unsigned long kernel_code_start; +static unsigned long kernel_code_end; +struct loaded_segment { + unsigned long start; + unsigned long end; +}; + +#define MAX_LOAD_SEGMENTS 128 +struct loaded_segment loaded_segments[MAX_LOAD_SEGMENTS]; + +unsigned long loaded_segments_num, loaded_segments_base; +static int seg_comp(const void *a, const void *b) +{ + const struct loaded_segment *x = a, *y = b; + /* avoid overflow */ + if (x->start > y->start) return 1; + if (x->start < y->start) return -1; + return 0; +} + +/* purgatory code need this info to patch the EFI memmap + */ +static void add_loaded_segments_info(struct mem_ehdr *ehdr) +{ + unsigned i = 0; + while(i < ehdr->e_phnum) { + struct mem_phdr *phdr; + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) { + i++; + continue; + } + + loaded_segments[loaded_segments_num].start = + _ALIGN_DOWN(phdr->p_paddr, ELF_PAGE_SIZE); + loaded_segments[loaded_segments_num].end = + loaded_segments[loaded_segments_num].start; + + /* Consolidate consecutive PL_LOAD segments into one. + * The end addr of the last PL_LOAD segment, calculated by + * adding p_memsz to p_paddr & rounded up to ELF_PAGE_SIZE, + * will be the end address of this loaded_segments entry. + */ + while (i < ehdr->e_phnum) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + break; + loaded_segments[loaded_segments_num].end = + _ALIGN(phdr->p_paddr + phdr->p_memsz, + ELF_PAGE_SIZE); + i++; + } + loaded_segments_num++; + } +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_crash_reserve_region(int *nr_ranges) +{ + int i, j, tidx = -1; + unsigned long cstart, cend; + struct memory_range temp_region; + + /* Crash reserved region. */ + cstart = crash_reserved_mem.start; + cend = crash_reserved_mem.end; + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (cstart < mend && cend > mstart) { + if (cstart != mstart && cend != mend) { + /* Split memory region */ + crash_memory_range[i].end = cstart - 1; + temp_region.start = cend + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (cstart != mstart) + crash_memory_range[i].end = cstart - 1; + else + crash_memory_range[i].start = cend + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == max_memory_ranges) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx].start = temp_region.start; + crash_memory_range[tidx].end = temp_region.end; + crash_memory_range[tidx].type = temp_region.type; + (*nr_ranges)++; + } + return 0; +} + +static int get_crash_memory_ranges(int *ranges) +{ + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + unsigned long start, end; + + crash_memory_range = xmalloc(sizeof(struct memory_range) * + max_memory_ranges); + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + while(fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + if (memory_ranges >= max_memory_ranges) + break; + count = sscanf(line, "%lx-%lx : %n", + &start, &end, &consumed); + str = line + consumed; + if (count != 2) + continue; + + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Reserved memory region. New kernel can + * use this region to boot into. */ + crash_reserved_mem.start = start; + crash_reserved_mem.end = end; + crash_reserved_mem.type = RANGE_RAM; + continue; + } + else if (memcmp(str, "Kernel code\n", 12) == 0) { + kernel_code_start = start; + kernel_code_end = end; + continue; + } else if (memcmp(str, "Uncached RAM\n", 13) == 0) { + type = RANGE_UNCACHED; + } else { + continue; + } + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + } + fclose(fp); + if (exclude_crash_reserve_region(&memory_ranges) < 0) + return -1; + *ranges = memory_ranges; + return 0; +} + +/* + * Note that this assignes a malloced pointer to *cmdline, + * which is likely never freed by the caller + */ +static void +cmdline_add_elfcorehdr(const char **cmdline, unsigned long addr) +{ + char *str; + char buf[64]; + size_t len; + sprintf(buf, " elfcorehdr=%ldK", addr/1024); + len = strlen(*cmdline) + strlen(buf) + 1; + str = xmalloc(len); + sprintf(str, "%s%s", *cmdline, buf); + *cmdline = str; +} + +int load_crashdump_segments(struct kexec_info *info, struct mem_ehdr *ehdr, + unsigned long max_addr, unsigned long min_base, + const char **cmdline) +{ + int nr_ranges; + unsigned long sz; + size_t size; + void *tmp; + if (info->kexec_flags & KEXEC_ON_CRASH && + get_crash_memory_ranges(&nr_ranges) == 0) { + int i; + + elf_info.kern_paddr_start = kernel_code_start; + for (i=0; i < nr_ranges; i++) { + unsigned long long mstart = crash_memory_range[i].start; + unsigned long long mend = crash_memory_range[i].end; + if (!mstart && !mend) + continue; + if (kernel_code_start >= mstart && + kernel_code_start < mend) { + elf_info.kern_vaddr_start = mstart + LOAD_OFFSET; + break; + } + } + elf_info.kern_size = kernel_code_end - kernel_code_start + 1; + if (crash_create_elf64_headers(info, &elf_info, + crash_memory_range, nr_ranges, + &tmp, &sz, EFI_PAGE_SIZE) < 0) + return -1; + + elfcorehdr = add_buffer(info, tmp, sz, sz, EFI_PAGE_SIZE, + min_base, max_addr, -1); + loaded_segments[loaded_segments_num].start = elfcorehdr; + loaded_segments[loaded_segments_num].end = elfcorehdr + sz; + loaded_segments_num++; + cmdline_add_elfcorehdr(cmdline, elfcorehdr); + } + add_loaded_segments_info(ehdr); + size = sizeof(struct loaded_segment) * loaded_segments_num; + qsort(loaded_segments, loaded_segments_num, + sizeof(struct loaded_segment), seg_comp); + loaded_segments_base = add_buffer(info, loaded_segments, + size, size, 16, 0, max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__loaded_segments", + &loaded_segments_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__loaded_segments_num", + &loaded_segments_num, sizeof(long)); + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, + &end) == 0 ? (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/ia64/crashdump-ia64.h b/kexec/arch/ia64/crashdump-ia64.h new file mode 100644 index 0000000..72ba054 --- /dev/null +++ b/kexec/arch/ia64/crashdump-ia64.h @@ -0,0 +1,12 @@ +#ifndef CRASHDUMP_IA64_H +#define CRASHDUMP_IA64_H + +#define PAGE_OFFSET 0xe000000000000000UL +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +extern int load_crashdump_segments(struct kexec_info *info, + struct mem_ehdr *ehdr, unsigned long max_addr, + unsigned long min_base, const char **cmdline); + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) + +#endif diff --git a/kexec/arch/ia64/include/arch/options.h b/kexec/arch/ia64/include/arch/options.h new file mode 100644 index 0000000..e8754ad --- /dev/null +++ b/kexec/arch/ia64/include/arch/options.h @@ -0,0 +1,42 @@ +#ifndef KEXEC_ARCH_IA64_OPTIONS_H +#define KEXEC_ARCH_IA64_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_NOIO (OPT_ARCH_MAX+2) +#define OPT_VMM (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"initrd", 1, 0, OPT_RAMDISK}, \ + {"noio", 0, 0, OPT_NOIO}, \ + {"vmm", 1, 0, OPT_VMM}, \ + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + +#endif /* KEXEC_ARCH_IA64_OPTIONS_H */ diff --git a/kexec/arch/ia64/kexec-elf-ia64.c b/kexec/arch/ia64/kexec-elf-ia64.c new file mode 100644 index 0000000..142dee3 --- /dev/null +++ b/kexec/arch/ia64/kexec-elf-ia64.c @@ -0,0 +1,303 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2004 Albert Herranz + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + * Copyright (C) 2004 Khalid Aziz <khalid.aziz@hp.com> Hewlett Packard Co + * Copyright (C) 2005 Zou Nan hai <nanhai.zou@intel.com> Intel Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <limits.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "kexec-ia64.h" +#include "crashdump-ia64.h" +#include <arch/options.h> + +static const int probe_debug = 0; +extern unsigned long saved_efi_memmap_size; + +/* + * elf_ia64_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int elf_ia64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + if (probe_debug) { + fprintf(stderr, "Not an ELF executable\n"); + } + return -1; + } + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_IA_64) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + return -1; + } + return 0; +} + +void elf_ia64_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to " + "STRING.\n" + " --append=STRING Set the kernel command line to " + "STRING.\n" + " --initrd=FILE Use FILE as the kernel's initial " + "ramdisk.\n" + " --noio Disable I/O in purgatory code.\n" + " --vmm=FILE Use FILE as the kernel image for a\n" + " virtual machine monitor " + "(aka hypervisor)\n"); +} + +/* Move the crash kerenl physical offset to reserved region + */ +void move_loaded_segments(struct mem_ehdr *ehdr, unsigned long addr) +{ + unsigned i; + long offset = 0; + int found = 0; + struct mem_phdr *phdr; + for(i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type == PT_LOAD) { + offset = addr - phdr->p_paddr; + found++; + break; + } + } + if (!found) + die("move_loaded_segments: no PT_LOAD region 0x%016x\n", addr); + ehdr->e_entry += offset; + for(i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type == PT_LOAD) + phdr->p_paddr += offset; + } +} + +int elf_ia64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *command_line, *ramdisk=0, *vmm=0, *kernel_buf; + char *ramdisk_buf = NULL; + off_t ramdisk_size = 0, kernel_size; + unsigned long command_line_len; + unsigned long entry, max_addr, gp_value; + unsigned long command_line_base, ramdisk_base, image_base; + unsigned long efi_memmap_base, efi_memmap_size; + unsigned long boot_param_base; + unsigned long noio=0; + int result; + int opt; + char *efi_memmap_buf, *boot_param; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"noio", 0, 0, OPT_NOIO}, + {"vmm", 1, 0, OPT_VMM}, + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_NOIO: /* disable PIO and MMIO in purgatory code*/ + noio = 1; + break; + case OPT_VMM: + vmm = optarg; + break; + } + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 16; + } + + if (vmm) + kernel_buf = slurp_decompress_file(vmm, &kernel_size); + else { + kernel_buf = buf; + kernel_size = len; + } + + /* Parse the Elf file */ + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + if (result < 0) { + fprintf(stderr, "ELF parse failed\n"); + free_elf_info(&ehdr); + return result; + } + + if (info->kexec_flags & KEXEC_ON_CRASH ) { + if ((mem_min == 0x00) && (mem_max == ULONG_MAX)) { + fprintf(stderr, "Failed to find crash kernel region " + "in %s\n", proc_iomem()); + free_elf_info(&ehdr); + return -1; + } + move_loaded_segments(&ehdr, mem_min); + } else if (update_loaded_segments(&ehdr) < 0) { + fprintf(stderr, "Failed to place kernel\n"); + return -1; + } + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) { + fprintf(stderr, "ELF load failed\n"); + free_elf_info(&ehdr); + return result; + } + + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x0, ULONG_MAX, -1, 0); + + + if (load_crashdump_segments(info, &ehdr, max_addr, 0, + &command_line) < 0) + return -1; + + // reverve 4k for ia64_boot_param + boot_param = xmalloc(4096); + boot_param_base = add_buffer(info, boot_param, 4096, 4096, 4096, 0, + max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__noio", + &noio, sizeof(long)); + + elf_rel_set_symbol(&info->rhdr, "__boot_param_base", + &boot_param_base, sizeof(long)); + + // reserve efi_memmap of actual size allocated in production kernel + efi_memmap_size = saved_efi_memmap_size; + efi_memmap_buf = xmalloc(efi_memmap_size); + efi_memmap_base = add_buffer(info, efi_memmap_buf, + efi_memmap_size, efi_memmap_size, 4096, 0, + max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__efi_memmap_base", + &efi_memmap_base, sizeof(long)); + + elf_rel_set_symbol(&info->rhdr, "__efi_memmap_size", + &efi_memmap_size, sizeof(long)); + if (command_line) { + command_line_len = strlen(command_line) + 1; + } + if (command_line_len || (info->kexec_flags & KEXEC_ON_CRASH )) { + char *cmdline = xmalloc(command_line_len); + strcpy(cmdline, command_line); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + char buf[128]; + sprintf(buf," max_addr=%lluM min_addr=%lluM", + mem_max>>20, mem_min>>20); + command_line_len = strlen(cmdline) + strlen(buf) + 1; + cmdline = xrealloc(cmdline, command_line_len); + strcat(cmdline, buf); + } + + command_line_len = _ALIGN(command_line_len, 16); + command_line_base = add_buffer(info, cmdline, + command_line_len, command_line_len, + getpagesize(), 0UL, + max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__command_line_len", + &command_line_len, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__command_line", + &command_line_base, sizeof(long)); + } + + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_size); + ramdisk_base = add_buffer(info, ramdisk_buf, ramdisk_size, + ramdisk_size, + getpagesize(), 0, max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__ramdisk_base", + &ramdisk_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__ramdisk_size", + &ramdisk_size, sizeof(long)); + } + + if (vmm) { + image_base = add_buffer(info, buf, len, len, + getpagesize(), 0, max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__vmcode_base", + &image_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__vmcode_size", + &len, sizeof(long)); + } + + gp_value = info->rhdr.rel_addr + 0x200000; + elf_rel_set_symbol(&info->rhdr, "__gp_value", &gp_value, + sizeof(gp_value)); + + elf_rel_set_symbol(&info->rhdr, "__kernel_entry", &entry, + sizeof(entry)); + free_elf_info(&ehdr); + return 0; +} diff --git a/kexec/arch/ia64/kexec-elf-rel-ia64.c b/kexec/arch/ia64/kexec-elf-rel-ia64.c new file mode 100644 index 0000000..500f247 --- /dev/null +++ b/kexec/arch/ia64/kexec-elf-rel-ia64.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2005-2006 Zou Nan hai (nanhai.zou@intel.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* pugatory relocation code + * Most of the code in this file is + * based on arch/ia64/kernel/module.c in Linux kernel + */ + + +/* Most of the code in this file is + * based on arch/ia64/kernel/module.c in Linux kernel + */ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +#define MAX_LTOFF ((uint64_t) (1 << 22)) + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_IA_64) { + return 0; + } + return 1; +} + +static void +ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val) +{ + uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +static inline uint64_t +bundle (const uint64_t insn) +{ + return insn & ~0xfUL; +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + uint64_t gp_value = ehdr->rel_addr + 0x200000; + switch(r_type) { + case R_IA64_NONE: + break; + case R_IA64_SEGREL64LSB: + case R_IA64_DIR64LSB: + *((uint64_t *)location) = value; + break; + case R_IA64_DIR32LSB: + *((uint32_t *)location) = value; + if (value != *((uint32_t *)location)) + goto overflow; + break; + case R_IA64_IMM64: + ia64_patch((uint64_t)location, 0x01fffefe000UL, + /* bit 63 -> 36 */ + (((value & 0x8000000000000000UL) >> 27) + /* bit 21 -> 21 */ + | ((value & 0x0000000000200000UL) << 0) + /* bit 16 -> 22 */ + | ((value & 0x00000000001f0000UL) << 6) + /* bit 7 -> 27 */ + | ((value & 0x000000000000ff80UL) << 20) + /* bit 0 -> 13 */ + | ((value & 0x000000000000007fUL) << 13))); + ia64_patch((uint64_t)location - 1, 0x1ffffffffffUL, value>>22); + break; + case R_IA64_IMM22: + if (value + (1 << 21) >= (1 << 22)) + die("value out of IMM22 range\n"); + ia64_patch((uint64_t)location, 0x01fffcfe000UL, + /* bit 21 -> 36 */ + (((value & 0x200000UL) << 15) + /* bit 16 -> 22 */ + | ((value & 0x1f0000UL) << 6) + /* bit 7 -> 27 */ + | ((value & 0x00ff80UL) << 20) + /* bit 0 -> 13 */ + | ((value & 0x00007fUL) << 13) )); + break; + case R_IA64_PCREL21B: { + uint64_t delta = ((int64_t)value - (int64_t)address)/16; + if (delta + (1 << 20) >= (1 << 21)) + die("value out of IMM21B range\n"); + value = ((int64_t)(value - bundle(address)))/16; + ia64_patch((uint64_t)location, 0x11ffffe000UL, + (((value & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((value & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); + } + break; + case R_IA64_PCREL64LSB: { + value = value - address; + put_unaligned(value, (uint64_t *)location); + } break; + case R_IA64_GPREL22: + case R_IA64_LTOFF22X: + if (value - gp_value + MAX_LTOFF/2 >= MAX_LTOFF) + die("value out of gp relative range"); + value -= gp_value; + ia64_patch((uint64_t)location, 0x01fffcfe000UL, + (((value & 0x200000UL) << 15) /* bit 21 -> 36 */ + |((value & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + |((value & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + |((value & 0x00007fUL) << 13) /* bit 0 -> 13 */)); + break; + case R_IA64_LDXMOV: + if (value - gp_value + MAX_LTOFF/2 >= MAX_LTOFF) + die("value out of gp relative range"); + ia64_patch((uint64_t)location, 0x1fff80fe000UL, 0x10000000000UL); + break; + case R_IA64_LTOFF22: + + default: + die("Unknown rela relocation: 0x%lx 0x%lx\n", + r_type, address); + break; + } + return; +overflow: + die("overflow in relocation type %lu val %llx\n", + r_type, value); +} diff --git a/kexec/arch/ia64/kexec-ia64.c b/kexec/arch/ia64/kexec-ia64.c new file mode 100644 index 0000000..418d997 --- /dev/null +++ b/kexec/arch/ia64/kexec-ia64.c @@ -0,0 +1,247 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2004 Albert Herranz + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sched.h> +#include <limits.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "elf.h" +#include "kexec-ia64.h" +#include <arch/options.h> + +/* The number of entries in memory_range array is always smaller than + * the number of entries in the file returned by proc_iomem(), + * stored in max_memory_ranges. */ +static struct memory_range *memory_range; +int max_memory_ranges; +static int memory_ranges; +unsigned long saved_efi_memmap_size; + +/* Reserve range for EFI memmap and Boot parameter */ +static int split_range(int range, unsigned long start, unsigned long end) +{ + unsigned long ram_end = memory_range[range - 1].end; + unsigned int type = memory_range[range - 1].type; + int i; + //align end and start to page size of EFI + start = _ALIGN_DOWN(start, 1UL<<12); + end = _ALIGN(end, 1UL<<12); + for (i = 0; i < range; i++) + if(memory_range[i].start <= start && memory_range[i].end >=end) + break; + if (i >= range) + return range; + range = i; + if (memory_range[range].start < start) { + memory_range[range].end = start; + range++; + } + memory_range[range].start = start; + memory_range[range].end = end; + memory_range[range].type = RANGE_RESERVED; + range++; + if (end < ram_end) { + memory_range[range].start = end; + memory_range[range].end = ram_end; + memory_range[range].type = type; + range++; + } + return range; +} + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + /* allocate memory_range dynamically */ + max_memory_ranges = 0; + while(fgets(line, sizeof(line), fp) != 0) { + max_memory_ranges++; + } + memory_range = xmalloc(sizeof(struct memory_range) * + max_memory_ranges); + rewind(fp); + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long start, end; + char *str; + unsigned type; + int consumed; + int count; + if (memory_ranges >= max_memory_ranges) + break; + count = sscanf(line, "%lx-%lx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } + else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported + * values are subset of user defined values. + */ + + if (kexec_flags & KEXEC_ON_CRASH) { + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + continue; + } else if (memcmp(str, "Boot parameter\n", 14) == 0) { + memory_ranges = split_range(memory_ranges, start, end); + continue; + } else if (memcmp(str, "EFI Memory Map\n", 14) == 0) { + memory_ranges = split_range(memory_ranges, start, end); + saved_efi_memmap_size = end - start; + continue; + } else if (memcmp(str, "Uncached RAM\n", 13) == 0) { + type = RANGE_UNCACHED; + } else { + continue; + } + /* + * Check if this memory range can be coalesced with + * the previous range + */ + if ((memory_ranges > 0) && + (start == memory_range[memory_ranges-1].end) && + (type == memory_range[memory_ranges-1].type)) { + memory_range[memory_ranges-1].end = end; + } + else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + {"elf-ia64", elf_ia64_probe, elf_ia64_load, elf_ia64_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + /* This doesn't belong here! Some sort of arch_init() ? */ + + /* execute from monarch processor */ + cpu_set_t affinity; + CPU_ZERO(&affinity); + CPU_SET(0, &affinity); + sched_setaffinity(0, sizeof(affinity), &affinity); + + return 0; +} + +const struct arch_map_entry arches[] = { + { "ia64", KEXEC_ARCH_IA_64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +int update_loaded_segments(struct mem_ehdr *ehdr) +{ + int i; + unsigned u; + struct mem_phdr *phdr; + unsigned long start_addr = ULONG_MAX, end_addr = 0; + unsigned long align = 1UL<<26; /* 64M */ + unsigned long start, end; + + for (u = 0; u < ehdr->e_phnum; u++) { + phdr = &ehdr->e_phdr[u]; + if (phdr->p_type != PT_LOAD) + continue; + if (phdr->p_paddr < start_addr) + start_addr = phdr->p_paddr; + if ((phdr->p_paddr + phdr->p_memsz) > end_addr) + end_addr = phdr->p_paddr + phdr->p_memsz; + } + + for (i = 0; i < memory_ranges && memory_range[i].start <= start_addr; + i++) { + if (memory_range[i].type == RANGE_RAM && + memory_range[i].end > end_addr) + return 0; + } + + for (i = 0; i < memory_ranges; i++) { + if (memory_range[i].type != RANGE_RAM) + continue; + start = _ALIGN(memory_range[i].start, align); + end = memory_range[i].end; + if (end > start && (end - start) > (end_addr - start_addr)) { + move_loaded_segments(ehdr, start); + return 0; + } + } + + return -1; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + diff --git a/kexec/arch/ia64/kexec-ia64.h b/kexec/arch/ia64/kexec-ia64.h new file mode 100644 index 0000000..31e4041 --- /dev/null +++ b/kexec/arch/ia64/kexec-ia64.h @@ -0,0 +1,14 @@ +#ifndef KEXEC_IA64_H +#define KEXEC_IA64_H + +extern int max_memory_ranges; +int elf_ia64_probe(const char *buf, off_t len); +int elf_ia64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ia64_usage(void); +int update_loaded_segments(struct mem_ehdr *ehdr); +void move_loaded_segments(struct mem_ehdr *ehdr, unsigned long addr); + +#define EFI_PAGE_SIZE (1UL<<12) +#define ELF_PAGE_SIZE (1UL<<16) +#endif /* KEXEC_IA64_H */ diff --git a/kexec/arch/ia64/kexec-iomem.c b/kexec/arch/ia64/kexec-iomem.c new file mode 100644 index 0000000..7fd50cd --- /dev/null +++ b/kexec/arch/ia64/kexec-iomem.c @@ -0,0 +1,23 @@ +#include <stdint.h> +#include <stdio.h> +#include "../../kexec.h" +#include "../../crashdump.h" + +static const char proc_iomem_str[]= "/proc/iomem"; +static const char proc_iomem_machine_str[]= "/proc/iomem_machine"; + +/* + * On IA64 XEN the EFI tables are virtualised. + * For this reason on such systems /proc/iomem_machine is provided, + * which is based on the hypervisor's (machine's) EFI tables. + * If Xen is in use, then /proc/iomem is used for memory regions relating + * to the currently running dom0 kernel, and /proc/iomem_machine is used + * for regions relating to the machine itself or the hypervisor. + * If Xen is not in used, then /proc/iomem used. + */ +const char *proc_iomem(void) +{ + if (xen_present()) + return proc_iomem_machine_str; + return proc_iomem_str; +} diff --git a/kexec/arch/loongarch/Makefile b/kexec/arch/loongarch/Makefile new file mode 100644 index 0000000..3b33b96 --- /dev/null +++ b/kexec/arch/loongarch/Makefile @@ -0,0 +1,22 @@ +# +# kexec loongarch (linux booting linux) +# +loongarch_KEXEC_SRCS = kexec/arch/loongarch/kexec-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-elf-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-pei-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-elf-rel-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/crashdump-loongarch.c + +loongarch_MEM_REGIONS = kexec/mem_regions.c + +loongarch_CPPFLAGS += -I $(srcdir)/kexec/ + +loongarch_ADD_BUFFER = +loongarch_ADD_SEGMENT = +loongarch_VIRT_TO_PHYS = + +dist += kexec/arch/loongarch/Makefile $(loongarch_KEXEC_SRCS) \ + kexec/arch/loongarch/kexec-loongarch.h \ + kexec/arch/loongarch/image-header.h \ + kexec/arch/loongarch/crashdump-loongarch.h \ + kexec/arch/loongarch/include/arch/options.h diff --git a/kexec/arch/loongarch/crashdump-loongarch.c b/kexec/arch/loongarch/crashdump-loongarch.c new file mode 100644 index 0000000..81250e4 --- /dev/null +++ b/kexec/arch/loongarch/crashdump-loongarch.c @@ -0,0 +1,220 @@ +/* + * LoongArch crashdump. + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * derived from crashdump-arm64.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/elf.h> + +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-loongarch.h" +#include "iomem.h" +#include "kexec-loongarch.h" +#include "kexec-elf.h" +#include "mem_regions.h" + +/* memory ranges of crashed kernel */ +static struct memory_ranges system_memory_rgns; + +/* memory range reserved for crashkernel */ +struct memory_range crash_reserved_mem[CRASH_MAX_RESERVED_RANGES]; +struct memory_ranges usablemem_rgns = { + .size = 0, + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_mem, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info64 = { + .class = ELFCLASS64, + .data = ELFDATA2LSB, + .machine = EM_LOONGARCH, + .page_offset = PAGE_OFFSET, +}; + +/* + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called once for each memory region found in /proc/iomem. + * It locates system RAM and crashkernel reserved memory and places these to + * variables, respectively, system_memory_rgns and usablemem_rgns. + */ + +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) + return mem_regions_alloc_and_add(&usablemem_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) + return mem_regions_alloc_and_add(&system_memory_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0) + elf_info64.kern_paddr_start = base; + else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0) + elf_info64.kern_size = base + length - elf_info64.kern_paddr_start; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return usablemem_rgns.size; +} + +/* + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory + * regions in system_memory_ranges. + * Regions are sorted in ascending order. + * + * Returns 0 in case of success and a negative value otherwise. + */ +static int crash_get_memory_ranges(void) +{ + int i; + + /* + * First read all memory regions that can be considered as + * system memory including the crash area. + */ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + /* allow one or two regions for crash dump kernel */ + if (!usablemem_rgns.size) + return -EINVAL; + + dbgprint_mem_range("Reserved memory range", + usablemem_rgns.ranges, usablemem_rgns.size); + + for (i = 0; i < usablemem_rgns.size; i++) { + if (mem_regions_alloc_and_exclude(&system_memory_rgns, + &crash_reserved_mem[i])) { + fprintf(stderr, "Cannot allocate memory for ranges\n"); + return -ENOMEM; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&system_memory_rgns); + + dbgprint_mem_range("Coredump memory ranges", + system_memory_rgns.ranges, system_memory_rgns.size); + + /* + * For additional kernel code/data segment. + * kern_paddr_start/kern_size are determined in iomem_range_callback + */ + elf_info64.kern_vaddr_start = get_kernel_sym("_text"); + if (!elf_info64.kern_vaddr_start) + elf_info64.kern_vaddr_start = UINT64_MAX; + + return 0; +} + +/* + * load_crashdump_segments() - load the elf core header + * @info: kexec info structure + * + * This function creates and loads an additional segment of elf core header + : which is used to construct /proc/vmcore on crash dump kernel. + * + * Return 0 in case of success and -1 in case of error. + */ + +int load_crashdump_segments(struct kexec_info *info) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + + /* + * First fetch all the memory (RAM) ranges that we are going to + * pass to the crash dump kernel during panic. + */ + + err = crash_get_memory_ranges(); + + if (err) + return EFAILED; + + err = crash_create_elf64_headers(info, &elf_info64, + system_memory_rgns.ranges, system_memory_rgns.size, + &buf, &bufsz, ELF_CORE_HEADER_ALIGN); + + if (err) + return EFAILED; + + elfcorehdr = add_buffer(info, buf, bufsz, bufsz, 1024, + crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end, -1); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__, + elfcorehdr_mem.start, elfcorehdr_mem.end); + + return 0; +} + +/* + * e_entry and p_paddr are actually in virtual address space. + * Those values will be translated to physcal addresses by using + * virt_to_phys() in add_segment(). + * So let's fix up those values for later use so the memory base will be + * correctly replaced with crash_reserved_mem[usablemem_rgns.size - 1].start. + */ +void fixup_elf_addrs(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr; + int i; + + ehdr->e_entry += crash_reserved_mem[usablemem_rgns.size - 1].start; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + continue; + phdr->p_paddr += crash_reserved_mem[usablemem_rgns.size - 1].start; + } +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + if (!usablemem_rgns.size) + return -1; + + *start = crash_reserved_mem[usablemem_rgns.size - 1].start; + *end = crash_reserved_mem[usablemem_rgns.size - 1].end; + + return 0; +} diff --git a/kexec/arch/loongarch/crashdump-loongarch.h b/kexec/arch/loongarch/crashdump-loongarch.h new file mode 100644 index 0000000..25ff24b --- /dev/null +++ b/kexec/arch/loongarch/crashdump-loongarch.h @@ -0,0 +1,26 @@ +#ifndef CRASHDUMP_LOONGARCH_H +#define CRASHDUMP_LOONGARCH_H + +struct kexec_info; +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_reserved_mem[]; +extern struct memory_range elfcorehdr_mem; + +int load_crashdump_segments(struct kexec_info *info); +int is_crashkernel_mem_reserved(void); +void fixup_elf_addrs(struct mem_ehdr *ehdr); +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end); + +#define PAGE_OFFSET 0x9000000000000000ULL +#define MAXMEM 0 + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) +#define CRASH_MAX_MEMORY_RANGES (MAX_MEMORY_RANGES + 2) + +/* crash dump kernel support at most two regions, low_region and high region. */ +#define CRASH_MAX_RESERVED_RANGES 2 + +#define COMMAND_LINE_SIZE 512 + +extern struct arch_options_t arch_options; +#endif /* CRASHDUMP_LOONGARCH_H */ diff --git a/kexec/arch/loongarch/image-header.h b/kexec/arch/loongarch/image-header.h new file mode 100644 index 0000000..3b75765 --- /dev/null +++ b/kexec/arch/loongarch/image-header.h @@ -0,0 +1,79 @@ +/* + * LoongArch binary image header. + */ + +#if !defined(__LOONGARCH_IMAGE_HEADER_H) +#define __LOONGARCH_IMAGE_HEADER_H + +#include <endian.h> +#include <stdint.h> + +/** + * struct loongarch_image_header + * + * @pe_sig: Optional PE format 'MZ' signature. + * @reserved_1: Reserved. + * @kernel_entry: Kernel image entry pointer. + * @image_size: An estimated size of the memory image size in LSB byte order. + * @text_offset: The image load offset in LSB byte order. + * @reserved_2: Reserved. + * @reserved_3: Reserved. + * @pe_header: Optional offset to a PE format header. + **/ + +struct loongarch_image_header { + uint8_t pe_sig[2]; + uint16_t reserved_1[3]; + uint64_t kernel_entry; + uint64_t image_size; + uint64_t text_offset; + uint64_t reserved_2[3]; + uint32_t reserved_3; + uint32_t pe_header; +}; + +static const uint8_t loongarch_image_pe_sig[2] = {'M', 'Z'}; + +/** + * loongarch_header_check_pe_sig - Helper to check the loongarch image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int loongarch_header_check_pe_sig(const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == loongarch_image_pe_sig[0] + && h->pe_sig[1] == loongarch_image_pe_sig[1]); +} + +static inline uint64_t loongarch_header_text_offset( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->text_offset); +} + +static inline uint64_t loongarch_header_image_size( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->image_size); +} + +static inline uint64_t loongarch_header_kernel_entry( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->kernel_entry); +} + +#endif diff --git a/kexec/arch/loongarch/include/arch/options.h b/kexec/arch/loongarch/include/arch/options.h new file mode 100644 index 0000000..25a7dc1 --- /dev/null +++ b/kexec/arch/loongarch/include/arch/options.h @@ -0,0 +1,28 @@ +#ifndef KEXEC_ARCH_LOONGARCH_OPTIONS_H +#define KEXEC_ARCH_LOONGARCH_OPTIONS_H + +#define OPT_APPEND ((OPT_MAX)+0) +#define OPT_INITRD ((OPT_MAX)+1) +#define OPT_REUSE_CMDLINE ((OPT_MAX)+2) +#define OPT_ARCH_MAX ((OPT_MAX)+3) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "append", 1, NULL, OPT_APPEND }, \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "initrd", 1, NULL, OPT_INITRD }, \ + { "ramdisk", 1, NULL, OPT_INITRD }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR /* Only accept long arch options. */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +static const char loongarch_opts_usage[] __attribute__ ((unused)) = +" --append=STRING Set the kernel command line to STRING.\n" +" --command-line=STRING Set the kernel command line to STRING.\n" +" --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --ramdisk=FILE Use FILE as the kernel initial ramdisk.\n" +" --reuse-cmdline Use kernel command line from running system.\n"; + +#endif /* KEXEC_ARCH_LOONGARCH_OPTIONS_H */ diff --git a/kexec/arch/loongarch/kexec-elf-loongarch.c b/kexec/arch/loongarch/kexec-elf-loongarch.c new file mode 100644 index 0000000..45387ca --- /dev/null +++ b/kexec/arch/loongarch/kexec-elf-loongarch.c @@ -0,0 +1,125 @@ +/* + * kexec-elf-loongarch.c - kexec Elf loader for loongarch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE + +#include <limits.h> +#include <errno.h> +#include <elf.h> + +#include "kexec.h" +#include "kexec-elf.h" +#include "kexec-syscall.h" +#include "crashdump-loongarch.h" +#include "kexec-loongarch.h" +#include "arch/options.h" + +off_t initrd_base, initrd_size; + +int elf_loongarch_probe(const char *kernel_buf, off_t kernel_size) +{ + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + if (result < 0) { + dbgprintf("%s: Not an ELF executable.\n", __func__); + goto out; + } + + /* Verify the architecuture specific bits. */ + if (ehdr.e_machine != EM_LOONGARCH) { + dbgprintf("%s: Not an LoongArch ELF executable.\n", __func__); + result = -1; + goto out; + } + + result = 0; +out: + free_elf_info(&ehdr); + return result; +} + +int elf_loongarch_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct loongarch_image_header *header = NULL; + unsigned long kernel_segment; + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: build_elf_exec_info failed\n", __func__); + goto exit; + } + + kernel_segment = loongarch_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: image_size: %016lx\n", __func__, + kernel_size); + dbgprintf("%s: text_offset: %016lx\n", __func__, + loongarch_mem.text_offset); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + loongarch_mem.phys_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (loongarch_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in elf header in order to load + * vmlinux (elf_exec) into crash kernel's memory. + */ + fixup_elf_addrs(&ehdr); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + result = elf_exec_load(&ehdr, info); + + if (result) { + dbgprintf("%s: elf_exec_load failed\n", __func__); + goto exit; + } + + /* load additional data */ + result = loongarch_load_other_segments(info, kernel_segment + kernel_size); + +exit: + free_elf_info(&ehdr); + if (result) + fprintf(stderr, "kexec: Bad elf image file, load failed.\n"); + return result; +} + +void elf_loongarch_usage(void) +{ + printf( +" An LoongArch ELF image, little endian.\n" +" Typically vmlinux or a stripped version of vmlinux.\n\n"); +} diff --git a/kexec/arch/loongarch/kexec-elf-rel-loongarch.c b/kexec/arch/loongarch/kexec-elf-rel-loongarch.c new file mode 100644 index 0000000..59f7f5d --- /dev/null +++ b/kexec/arch/loongarch/kexec-elf-rel-loongarch.c @@ -0,0 +1,42 @@ +/* + * kexec-elf-rel-loongarch.c - kexec Elf relocation routines + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + + if (ehdr->ei_class != ELFCLASS32) + return 0; + + if (ehdr->e_machine != EM_LOONGARCH) + return 0; + + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/loongarch/kexec-loongarch.c b/kexec/arch/loongarch/kexec-loongarch.c new file mode 100644 index 0000000..f47c998 --- /dev/null +++ b/kexec/arch/loongarch/kexec-loongarch.c @@ -0,0 +1,375 @@ +/* + * kexec-loongarch.c - kexec for loongarch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <linux/elf-em.h> +#include <elf.h> +#include <elf_info.h> + +#include "kexec.h" +#include "kexec-loongarch.h" +#include "crashdump-loongarch.h" +#include "iomem.h" +#include "kexec-syscall.h" +#include "mem_regions.h" +#include "arch/options.h" + +#define CMDLINE_PREFIX "kexec " +static char cmdline[COMMAND_LINE_SIZE] = CMDLINE_PREFIX; + +/* Adds "initrd=start,size" parameters to command line. */ +static int cmdline_add_initrd(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + ptr = str; + strcpy(str, " initrd="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, ","); + ptr = str + strlen(str); + ultoa(size, ptr); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the appropriate "mem=size@start" options to command line, indicating the + * memory region the new kernel can use to boot into. */ +static int cmdline_add_mem(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " mem="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the "elfcorehdr=size@start" command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Return a sorted list of memory ranges. */ +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + char *str; + int type, consumed, count; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno)); + return -1; + } + + while (fgets(line, sizeof(line), fp) != 0) { + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) + type = RANGE_RAM; + else if (!strncmp(str, IOMEM_RESERVED, strlen(IOMEM_RESERVED))) + type = RANGE_RESERVED; + else + continue; + + if (memory_ranges > 0 && + memory_range[memory_ranges - 1].end == start && + memory_range[memory_ranges - 1].type == type) { + memory_range[memory_ranges - 1].end = end; + } else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES:", *range, *ranges); + return 0; +} + +struct file_type file_type[] = { + {"elf-loongarch", elf_loongarch_probe, elf_loongarch_load, elf_loongarch_usage}, + {"pei-loongarch", pei_loongarch_probe, pei_loongarch_load, pei_loongarch_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +/* loongarch global varables. */ + +struct loongarch_mem loongarch_mem; + +/** + * loongarch_process_image_header - Process the loongarch image header. + */ + +int loongarch_process_image_header(const struct loongarch_image_header *h) +{ + + if (!loongarch_header_check_pe_sig(h)) + return EFAILED; + + if (h->image_size) { + loongarch_mem.text_offset = loongarch_header_text_offset(h); + loongarch_mem.image_size = loongarch_header_image_size(h); + } + + return 0; +} + +void arch_usage(void) +{ + printf(loongarch_opts_usage); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 }, + }; + int opt; + char *cmdline = NULL; + const char *append = NULL; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + remove_parameter(cmdline, "kexec"); + remove_parameter(cmdline, "initrd"); + break; + case OPT_INITRD: + arch_options.initrd_file = optarg; + break; + default: + break; + } + } + + arch_options.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arch_options.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arch_options.initrd_file); + + return 0; +} + +const struct arch_map_entry arches[] = { + { "loongarch64", KEXEC_ARCH_LOONGARCH }, + { NULL, 0 }, +}; + +unsigned long loongarch_locate_kernel_segment(struct kexec_info *info) +{ + unsigned long hole; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + unsigned long hole_end; + + hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ? + mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start) + + loongarch_mem.text_offset; + hole = _ALIGN_UP(hole, MiB(1)); + hole_end = hole + loongarch_mem.text_offset + loongarch_mem.image_size; + + if ((hole_end > mem_max) || + (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) { + dbgprintf("%s: Crash kernel out of range\n", __func__); + hole = ULONG_MAX; + } + } else { + hole = locate_hole(info, + loongarch_mem.text_offset + loongarch_mem.image_size, + MiB(1), 0, ULONG_MAX, 1); + + if (hole == ULONG_MAX) + dbgprintf("%s: locate_hole failed\n", __func__); + } + + return hole; +} + +/* + * loongarch_load_other_segments - Prepare the initrd and cmdline segments. + */ + +int loongarch_load_other_segments(struct kexec_info *info, unsigned long hole_min) +{ + unsigned long initrd_min, hole_max; + char *initrd_buf = NULL; + unsigned long pagesize = getpagesize(); + + if (arch_options.command_line) { + if (strlen(arch_options.command_line) > + sizeof(cmdline) - 1) { + fprintf(stderr, + "Kernel command line too long for kernel!\n"); + return EFAILED; + } + + strncat(cmdline, arch_options.command_line, sizeof(cmdline) - 1); + } + + /* Put the other segments after the image. */ + + initrd_min = hole_min; + if (info->kexec_flags & KEXEC_ON_CRASH) + hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end; + else + hole_max = ULONG_MAX; + + if (arch_options.initrd_file) { + + initrd_buf = slurp_decompress_file(arch_options.initrd_file, &initrd_size); + + initrd_base = add_buffer(info, initrd_buf, initrd_size, + initrd_size, sizeof(void *), + _ALIGN_UP(initrd_min, + pagesize), hole_max, 1); + dbgprintf("initrd_base: %lx, initrd_size: %lx\n", initrd_base, initrd_size); + + cmdline_add_initrd(cmdline, initrd_base, initrd_size); + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + cmdline_add_elfcorehdr(cmdline, elfcorehdr_mem.start, + elfcorehdr_mem.end - elfcorehdr_mem.start + 1); + + cmdline_add_mem(cmdline, crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end - + crash_reserved_mem[usablemem_rgns.size - 1].start + 1); + } + + cmdline[sizeof(cmdline) - 1] = 0; + add_buffer(info, cmdline, sizeof(cmdline), sizeof(cmdline), + sizeof(void *), _ALIGN_UP(hole_min, getpagesize()), + 0xffffffff, 1); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, cmdline); + + return 0; + +} + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr & ((1ULL << 48) - 1); +} + +/* + * add_segment() should convert base to a physical address on loongarch, + * while the default is just to work with base as is + */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, virt_to_phys(base), memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on loongarch, + * while the default is just to work with base as is + */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} diff --git a/kexec/arch/loongarch/kexec-loongarch.h b/kexec/arch/loongarch/kexec-loongarch.h new file mode 100644 index 0000000..5120a26 --- /dev/null +++ b/kexec/arch/loongarch/kexec-loongarch.h @@ -0,0 +1,60 @@ +#ifndef KEXEC_LOONGARCH_H +#define KEXEC_LOONGARCH_H + +#include <sys/types.h> + +#include "image-header.h" + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 + +#define CORE_TYPE_ELF64 1 + +#define COMMAND_LINE_SIZE 512 + +#define KiB(x) ((x) * 1024UL) +#define MiB(x) (KiB(x) * 1024UL) + +int elf_loongarch_probe(const char *kernel_buf, off_t kernel_size); +int elf_loongarch_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_loongarch_usage(void); + +int pei_loongarch_probe(const char *buf, off_t len); +int pei_loongarch_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void pei_loongarch_usage(void); + +int loongarch_process_image_header(const struct loongarch_image_header *h); + +unsigned long loongarch_locate_kernel_segment(struct kexec_info *info); +int loongarch_load_other_segments(struct kexec_info *info, + unsigned long hole_min); + +struct arch_options_t { + char *command_line; + char *initrd_file; + char *dtb; + int core_header_type; +}; + +/** + * struct loongarch_mem - Memory layout info. + */ + +struct loongarch_mem { + uint64_t phys_offset; + uint64_t text_offset; + uint64_t image_size; +}; + +extern struct loongarch_mem loongarch_mem; + +extern struct memory_ranges usablemem_rgns; +extern struct arch_options_t arch_options; +extern off_t initrd_base, initrd_size; + +#endif /* KEXEC_LOONGARCH_H */ diff --git a/kexec/arch/loongarch/kexec-pei-loongarch.c b/kexec/arch/loongarch/kexec-pei-loongarch.c new file mode 100644 index 0000000..1a11103 --- /dev/null +++ b/kexec/arch/loongarch/kexec-pei-loongarch.c @@ -0,0 +1,124 @@ +/* + * LoongArch kexec PE format binary image support. + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * derived from kexec-image-arm64.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE + +#include <limits.h> +#include <errno.h> +#include <elf.h> + +#include "kexec.h" +#include "kexec-elf.h" +#include "image-header.h" +#include "kexec-syscall.h" +#include "crashdump-loongarch.h" +#include "kexec-loongarch.h" +#include "arch/options.h" + +int pei_loongarch_probe(const char *kernel_buf, off_t kernel_size) +{ + const struct loongarch_image_header *h; + + if (kernel_size < sizeof(struct loongarch_image_header)) { + dbgprintf("%s: No loongarch image header.\n", __func__); + return -1; + } + + h = (const struct loongarch_image_header *)(kernel_buf); + + if (!loongarch_header_check_pe_sig(h)) { + dbgprintf("%s: Bad loongarch PE image header.\n", __func__); + return -1; + } + + return 0; +} + +int pei_loongarch_load(int argc, char **argv, const char *buf, + off_t len, struct kexec_info *info) +{ + int result; + unsigned long hole_min = 0; + unsigned long kernel_segment, kernel_entry; + const struct loongarch_image_header *header; + + header = (const struct loongarch_image_header *)(buf); + + if (loongarch_process_image_header(header)) + return EFAILED; + + kernel_segment = loongarch_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + kernel_entry = virt_to_phys(loongarch_header_kernel_entry(header)); + + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in order to load vmlinux.efi into + * crash kernel's memory. + */ + kernel_entry += crash_reserved_mem[usablemem_rgns.size - 1].start; + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: kernel_entry: %016lx\n", __func__, kernel_entry); + dbgprintf("%s: image_size: %016lx\n", __func__, + loongarch_mem.image_size); + dbgprintf("%s: text_offset: %016lx\n", __func__, + loongarch_mem.text_offset); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + loongarch_mem.phys_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (loongarch_header_check_pe_sig(header) ? "yes" : "no")); + + /* Get kernel entry point */ + info->entry = (void *)kernel_entry; + + hole_min = kernel_segment + loongarch_mem.image_size; + + /* Create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* Load the kernel */ + add_segment(info, buf, len, kernel_segment, loongarch_mem.image_size); + + /* Prepare and load dtb and initrd data */ + result = loongarch_load_other_segments(info, hole_min); + if (result) { + fprintf(stderr, "kexec: Load dtb and initrd segments failed.\n"); + goto exit; + } + +exit: + if (result) + fprintf(stderr, "kexec: load failed.\n"); + + return result; +} + +void pei_loongarch_usage(void) +{ + printf( +" An LoongArch PE format binary image, uncompressed, little endian.\n" +" Typically a vmlinux.efi file.\n\n"); +} diff --git a/kexec/arch/m68k/Makefile b/kexec/arch/m68k/Makefile new file mode 100644 index 0000000..eeaacbd --- /dev/null +++ b/kexec/arch/m68k/Makefile @@ -0,0 +1,15 @@ +# +# kexec m68k (linux booting linux) +# +m68k_KEXEC_SRCS = kexec/arch/m68k/kexec-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-rel-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/bootinfo.c + +m68k_ADD_SEGMENT = +m68k_VIRT_TO_PHYS = + +dist += kexec/arch/m68k/Makefile $(m68k_KEXEC_SRCS) \ + kexec/arch/m68k/bootinfo.h \ + kexec/arch/m68k/kexec-m68k.h \ + kexec/arch/m68k/include/arch/options.h diff --git a/kexec/arch/m68k/bootinfo.c b/kexec/arch/m68k/bootinfo.c new file mode 100644 index 0000000..086a34b --- /dev/null +++ b/kexec/arch/m68k/bootinfo.c @@ -0,0 +1,285 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/random.h> + +#include "../../kexec.h" + +#include "bootinfo.h" + +const char *bootinfo_file = DEFAULT_BOOTINFO_FILE; +static struct bi_rec *bootinfo; +static off_t bootinfo_size; + +static unsigned int num_memchunks; + +static struct bi_rec *bi_next(struct bi_rec *bi, uint16_t size) +{ + return (void *)((unsigned long)bi + size); +} + +static struct bi_rec *bi_find(struct bi_rec *prev, uint16_t tag) +{ + struct bi_rec *bi = prev ? bi_next(prev, prev->size) : bootinfo; + + for (bi = prev ? bi_next(prev, prev->size) : bootinfo; + bi->tag != BI_LAST; bi = bi_next(bi, bi->size)) + if (bi->tag == tag) + return bi; + return NULL; +} + +static void bi_remove(uint16_t tag) +{ + struct bi_rec *bi; + off_t rem; + uint16_t size; + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (bi->tag == BI_LAST) + break; + + size = bi->size; + if (bi->tag == tag) { + memmove(bi, bi_next(bi, size), rem - size); + bootinfo_size -= size; + rem -= size; + continue; + } + + bi = bi_next(bi, size); + rem -= size; + } +} + +static struct bi_rec *bi_add(uint16_t tag, uint16_t size) +{ + struct bi_rec *bi; + + /* Add 4-byte header and round up to multiple of 4 bytes */ + size = _ALIGN_UP(4 + size, 4); + + bootinfo = xrealloc(bootinfo, bootinfo_size + size); + + /* Replace old sentinel by new record */ + bi = bi_next(bootinfo, bootinfo_size - 2); + bootinfo_size += size; + memset(bi, 0, size); + bi->tag = tag; + bi->size = size; + + /* Re-add sentinel */ + bi_next(bi, size)->tag = BI_LAST; + + return bi; +} + +void bootinfo_load(void) +{ + struct bi_rec *bi; + off_t rem; + uint16_t tag, size; + + dbgprintf("Loading bootinfo from %s\n", bootinfo_file); + bootinfo = (void *)slurp_file_len(bootinfo_file, MAX_BOOTINFO_SIZE, + &bootinfo_size); + if (!bootinfo) + die("No bootinfo\n"); + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (rem < 2) + die("Unexpected end of bootinfo\n"); + + tag = bi->tag; + if (tag == BI_LAST) { + rem -= 2; + break; + } + + if (rem < 4) + die("Unexpected end of bootinfo\n"); + + size = bi->size; + if (size < 4 || size % 4) + die("Invalid tag size\n"); + if (rem < size) + die("Unexpected end of bootinfo\n"); + + if (tag == BI_MEMCHUNK) + num_memchunks++; + + bi = bi_next(bi, size); + rem -= size; + } + + if (rem) + die("Trailing data at end of bootinfo\n"); +} + +void bootinfo_print(void) +{ + struct bi_rec *bi = bootinfo; + uint16_t tag, size; + + while (1) { + tag = bi->tag; + if (tag == BI_LAST) { + puts("BI_LAST"); + break; + } + + size = bi->size; + switch (tag) { + case BI_MACHTYPE: + printf("BI_MACHTYPE: 0x%08x\n", bi->machtype); + break; + + case BI_MEMCHUNK: + printf("BI_MEMCHUNK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_RAMDISK: + printf("BI_RAMDISK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_COMMAND_LINE: + printf("BI_COMMAND_LINE: %s\n", bi->string); + break; + + case BI_RNG_SEED: + /* These are secret, so never print them to the console */ + printf("BI_RNG_SEED: 0x%08x bytes\n", be16_to_cpu(bi->rng_seed.len)); + break; + + default: + printf("BI tag 0x%04x size %u\n", tag, size); + break; + } + bi = bi_next(bi, size); + } +} + +int bootinfo_get_memory_ranges(struct memory_range **range) +{ + struct memory_range *ranges; + unsigned int i; + struct bi_rec *bi; + + ranges = xmalloc(num_memchunks * sizeof(struct memory_range)); + for (i = 0, bi = NULL; + i < num_memchunks && (bi = bi_find(bi, BI_MEMCHUNK)); i++) { + ranges[i].start = bi->mem_info.addr; + ranges[i].end = bi->mem_info.addr + bi->mem_info.size - 1; + ranges[i].type = RANGE_RAM; + } + + *range = ranges; + return i; +} + +void bootinfo_set_cmdline(const char *cmdline) +{ + struct bi_rec *bi; + uint16_t size; + + /* Remove existing command line records */ + bi_remove(BI_COMMAND_LINE); + + if (!cmdline) + return; + + /* Add new command line record */ + size = strlen(cmdline) + 1; + bi = bi_add(BI_COMMAND_LINE, size); + memcpy(bi->string, cmdline, size); +} + +void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size) +{ + struct bi_rec *bi; + + /* Remove existing ramdisk records */ + bi_remove(BI_RAMDISK); + + if (!ramdisk_size) + return; + + /* Add new ramdisk record */ + bi = bi_add(BI_RAMDISK, sizeof(bi->mem_info)); + bi->mem_info.addr = ramdisk_addr; + bi->mem_info.size = ramdisk_size; +} + +void bootinfo_add_rng_seed(void) +{ + enum { RNG_SEED_LEN = 32 }; + struct bi_rec *bi; + + /* Remove existing rng seed records */ + bi_remove(BI_RNG_SEED); + + /* Add new rng seed record */ + bi = bi_add(BI_RNG_SEED, sizeof(bi->rng_seed) + RNG_SEED_LEN); + if (getrandom(bi->rng_seed.data, RNG_SEED_LEN, GRND_NONBLOCK) != RNG_SEED_LEN) { + bi_remove(BI_RNG_SEED); + return; + } + bi->rng_seed.len = cpu_to_be16(RNG_SEED_LEN); +} + + + /* + * Check the bootinfo version in the kernel image + * All failures are non-fatal, as kexec may be used to load + * non-Linux images + */ + +void bootinfo_check_bootversion(const struct kexec_info *info) +{ + struct bi_rec *bi; + const struct bootversion *bv; + uint16_t major, minor; + unsigned int i; + + bv = info->segment[0].buf; + if (bv->magic != BOOTINFOV_MAGIC) { + printf("WARNING: No bootversion in kernel image\n"); + return; + } + + bi = bi_find(NULL, BI_MACHTYPE); + if (!bi) { + printf("WARNING: No machtype in bootinfo\n"); + return; + } + + for (i = 0; bv->machversions[i].machtype != bi->machtype; i++) + if (!bv->machversions[i].machtype) { + printf("WARNING: Machtype 0x%08x not in kernel bootversion\n", + bi->machtype); + return; + } + + major = BI_VERSION_MAJOR(bv->machversions[i].version); + minor = BI_VERSION_MINOR(bv->machversions[i].version); + dbgprintf("Kernel uses bootversion %u.%u\n", major, minor); + if (major != SUPPORTED_BOOTINFO_VERSION) + printf("WARNING: Kernel bootversion %u.%u is too %s for this kexec (expected %u.x)\n", + major, minor, + major < SUPPORTED_BOOTINFO_VERSION ? "old" : "new", + SUPPORTED_BOOTINFO_VERSION); +} + +void add_bootinfo(struct kexec_info *info, unsigned long addr) +{ + add_buffer(info, bootinfo, bootinfo_size, bootinfo_size, + sizeof(void *), addr, 0x0fffffff, 1); +} diff --git a/kexec/arch/m68k/bootinfo.h b/kexec/arch/m68k/bootinfo.h new file mode 100644 index 0000000..bb8a03a --- /dev/null +++ b/kexec/arch/m68k/bootinfo.h @@ -0,0 +1,53 @@ +#include <asm/bootinfo.h> + +#define DEFAULT_BOOTINFO_FILE "/proc/bootinfo" +#define MAX_BOOTINFO_SIZE 1536 + +/* New in 6.2's <asm/bootinfo.h>. Remove once 6.2 is baseline version. */ +#ifndef BI_RNG_SEED +#define BI_RNG_SEED 0x0008 +#endif + + + /* + * Convenience overlay of several struct bi_record variants + */ + +struct bi_rec { + __be16 tag; + __be16 size; + union { + __be32 data[0]; + /* shorthands for the types we use */ + __be32 machtype; + struct { + __be32 addr; + __be32 size; + } mem_info; + char string[0]; + struct { + __be16 len; + __u8 data[0]; + } rng_seed; + }; +}; + + + /* + * We only support the "new" tagged bootinfo (v2) + */ + +#define SUPPORTED_BOOTINFO_VERSION 2 + + +extern const char *bootinfo_file; + +extern void bootinfo_load(void); +extern void bootinfo_print(void); +extern int bootinfo_get_memory_ranges(struct memory_range **range); +extern void bootinfo_set_cmdline(const char *cmdline); +extern void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size); +extern void bootinfo_add_rng_seed(void); +extern void bootinfo_check_bootversion(const struct kexec_info *info); +extern void add_bootinfo(struct kexec_info *info, unsigned long addr); diff --git a/kexec/arch/m68k/include/arch/options.h b/kexec/arch/m68k/include/arch/options.h new file mode 100644 index 0000000..f279d54 --- /dev/null +++ b/kexec/arch/m68k/include/arch/options.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_ARCH_M68K_OPTIONS_H +#define KEXEC_ARCH_M68K_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_BOOTINFO (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_M68K_OPTIONS_H */ diff --git a/kexec/arch/m68k/kexec-elf-m68k.c b/kexec/arch/m68k/kexec-elf-m68k.c new file mode 100644 index 0000000..a2bf7ee --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-m68k.c @@ -0,0 +1,183 @@ +/* + * kexec-elf-m68k.c - kexec Elf loader for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + +#define KiB * 1024 +#define MiB * 1024 KiB + +#define PAGE_SIZE 4 KiB + + +int elf_m68k_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_68K) { + /* for a different architecture */ + fprintf(stderr, "Not for this architecture.\n"); + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_m68k_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --bootinfo=FILE Use FILE as the kernel's bootinfo\n" + ); +} + +static unsigned long segment_end(const struct kexec_info *info, int i) +{ + return (unsigned long)info->segment[i].mem + info->segment[i].memsz - 1; +} + +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *cmdline = NULL, *ramdisk_file = NULL; + int opt, result, i; + unsigned long bootinfo_addr, ramdisk_addr = 0; + off_t ramdisk_size = 0; + + /* See options.h if adding any more options. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + cmdline = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk_file = optarg; + break; + case OPT_BOOTINFO: + break; + } + } + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Fixup PT_LOAD segments that include the ELF header (offset zero) */ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type != PT_LOAD || phdr->p_offset) + continue; + + dbgprintf("Removing ELF header from segment %d\n", i); + phdr->p_paddr += PAGE_SIZE; + phdr->p_vaddr += PAGE_SIZE; + phdr->p_filesz -= PAGE_SIZE; + phdr->p_memsz -= PAGE_SIZE; + phdr->p_offset += PAGE_SIZE; + phdr->p_data += PAGE_SIZE; + } + + /* Load the ELF data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + /* Bootinfo must be stored right after the kernel */ + bootinfo_addr = segment_end(info, info->nr_segments - 1) + 1; + + /* Load ramdisk */ + if (ramdisk_file) { + void *ramdisk = slurp_decompress_file(ramdisk_file, + &ramdisk_size); + /* Store ramdisk at top of first memory chunk */ + ramdisk_addr = _ALIGN_DOWN(info->memory_range[0].end - + ramdisk_size + 1, + PAGE_SIZE); + if (!buf) + die("Ramdisk load failed\n"); + add_buffer(info, ramdisk, ramdisk_size, ramdisk_size, + PAGE_SIZE, ramdisk_addr, info->memory_range[0].end, + 1); + } + + /* Update and add bootinfo */ + bootinfo_set_cmdline(cmdline); + bootinfo_set_ramdisk(ramdisk_addr, ramdisk_size); + bootinfo_add_rng_seed(); + if (kexec_debug) + bootinfo_print(); + add_bootinfo(info, bootinfo_addr); + + /* + * Check if the kernel (and bootinfo) exceed 4 MiB, as current kernels + * don't support that. + * As the segments are still unsorted, the bootinfo is located in the + * last segment. + */ + if (segment_end(info, info->nr_segments - 1) >= virt_to_phys(4 MiB - 1)) + printf("WARNING: Kernel is larger than 4 MiB\n"); + + /* Check struct bootversion at start of kernel */ + bootinfo_check_bootversion(info); + + return 0; +} diff --git a/kexec/arch/m68k/kexec-elf-rel-m68k.c b/kexec/arch/m68k/kexec-elf-rel-m68k.c new file mode 100644 index 0000000..0cc38cc --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-rel-m68k.c @@ -0,0 +1,39 @@ +/* + * kexec-elf-rel-m68k.c - kexec Elf relocation routines + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->ei_class != ELFCLASS32) + return 0; + if (ehdr->e_machine != EM_68K) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/m68k/kexec-m68k.c b/kexec/arch/m68k/kexec-m68k.c new file mode 100644 index 0000000..cb54927 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.c @@ -0,0 +1,110 @@ +/* + * kexec-m68k.c - kexec for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + + +static unsigned long m68k_memoffset; + + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + bootinfo_load(); + *ranges = bootinfo_get_memory_ranges(range); + m68k_memoffset = (*range)[0].start; + return 0; +} + + +struct file_type file_type[] = { + {"elf-m68k", elf_m68k_probe, elf_m68k_load, elf_m68k_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + break; + case OPT_BOOTINFO: + bootinfo_file = optarg; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "m68k", KEXEC_ARCH_68K }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr + m68k_memoffset; +} + +/* + * add_segment() should convert base to a physical address on m68k, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} diff --git a/kexec/arch/m68k/kexec-m68k.h b/kexec/arch/m68k/kexec-m68k.h new file mode 100644 index 0000000..99482c4 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_M68K_H +#define KEXEC_M68K_H + +int elf_m68k_probe(const char *buf, off_t len); +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_m68k_usage(void); + +#endif /* KEXEC_M68K_H */ diff --git a/kexec/arch/mips/Makefile b/kexec/arch/mips/Makefile new file mode 100644 index 0000000..1fe7886 --- /dev/null +++ b/kexec/arch/mips/Makefile @@ -0,0 +1,29 @@ +# +# kexec mips (linux booting linux) +# +mips_KEXEC_SRCS = kexec/arch/mips/kexec-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/kexec-elf-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/kexec-elf-rel-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/crashdump-mips.c + +mips_FS2DT = kexec/fs2dt.c +mips_FS2DT_INCLUDE = \ + -include $(srcdir)/kexec/arch/mips/crashdump-mips.h \ + -include $(srcdir)/kexec/arch/mips/kexec-mips.h + +mips_DT_OPS += kexec/dt-ops.c + +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) +mips_CPPFLAGS += -I$(srcdir)/kexec/libfdt +mips_KEXEC_SRCS += $(libfdt_SRCS) + +mips_ADD_BUFFER = +mips_ADD_SEGMENT = +mips_VIRT_TO_PHYS = + +dist += kexec/arch/mips/Makefile $(mips_KEXEC_SRCS) \ + kexec/arch/mips/kexec-mips.h \ + kexec/arch/mips/crashdump-mips.h \ + kexec/arch/mips/include/arch/options.h diff --git a/kexec/arch/mips/crashdump-mips.c b/kexec/arch/mips/crashdump-mips.c new file mode 100644 index 0000000..00f4335 --- /dev/null +++ b/kexec/arch/mips/crashdump-mips.c @@ -0,0 +1,408 @@ +/* + * kexec: Linux boots Linux + * + * 2005 (C) IBM Corporation. + * 2008 (C) MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <inttypes.h> +#include <elf.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-mips.h" +#include "crashdump-mips.h" +#include "unused.h" + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region */ +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +/* Not used currently but required by generic fs2dt code */ +struct memory_ranges usablemem_rgns; + +/* Memory region reserved for storing panic kernel and other data. */ +static struct memory_range crash_reserved_mem; + +/* Read kernel physical load addr from the file returned by proc_iomem() + * (Kernel Code) and store in kexec_info */ +static int get_kernel_paddr(struct crash_elf_info *elf_info) +{ + uint64_t start; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { + elf_info->kern_paddr_start = start; + dbgprintf("kernel load physical addr start = 0x%" PRIu64 "\n", start); + return 0; + } + + fprintf(stderr, "Cannot determine kernel physical load addr\n"); + return -1; +} + +static int get_kernel_vaddr_and_size(struct crash_elf_info *elf_info, + unsigned long start_offset) +{ + uint64_t end; + + if (!elf_info->kern_paddr_start) + return -1; + + elf_info->kern_vaddr_start = elf_info->kern_paddr_start | + start_offset; + /* If "Kernel bss" exists, the kernel ends there, else fall + * through and say that it ends at "Kernel data" */ + if (parse_iomem_single("Kernel bss\n", NULL, &end) == 0 || + parse_iomem_single("Kernel data\n", NULL, &end) == 0) { + elf_info->kern_size = end - elf_info->kern_paddr_start; + dbgprintf("kernel_vaddr= 0x%llx paddr %llx\n", + elf_info->kern_vaddr_start, + elf_info->kern_paddr_start); + dbgprintf("kernel size = 0x%lx\n", elf_info->kern_size); + return 0; + } + fprintf(stderr, "Cannot determine kernel virtual load addr and size\n"); + return -1; +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_crash_reserve_region(int *nr_ranges) +{ + int i, j, tidx = -1; + unsigned long long cstart, cend; + struct memory_range temp_region = { + .start = 0, + .end = 0 + }; + + /* Crash reserved region. */ + cstart = crash_reserved_mem.start; + cend = crash_reserved_mem.end; + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (cstart < mend && cend > mstart) { + if (cstart != mstart && cend != mend) { + /* Split memory region */ + crash_memory_range[i].end = cstart - 1; + temp_region.start = cend + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (cstart != mstart) + crash_memory_range[i].end = cstart - 1; + else + crash_memory_range[i].start = cend + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == CRASH_MAX_MEMORY_RANGES) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx].start = temp_region.start; + crash_memory_range[tidx].end = temp_region.end; + crash_memory_range[tidx].type = temp_region.type; + (*nr_ranges)++; + } + return 0; +} +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + const char iomem[] = "/proc/iomem"; + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + /* Separate segment for backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + while (fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + if (memory_ranges >= CRASH_MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + /* Only Dumping memory of type System RAM. */ + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Reserved memory region. New kernel can + * use this region to boot into. */ + crash_reserved_mem.start = start; + crash_reserved_mem.end = end; + crash_reserved_mem.type = RANGE_RAM; + continue; + } else + continue; + + if (start == BACKUP_SRC_START && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + + /* Segregate linearly mapped region. */ + if (MAXMEM && (MAXMEM - 1) >= start && (MAXMEM - 1) <= end) { + crash_memory_range[memory_ranges - 1].end = MAXMEM - 1; + + /* Add segregated region. */ + crash_memory_range[memory_ranges].start = MAXMEM; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + + if (exclude_crash_reserve_region(&memory_ranges) < 0) + return -1; + + *range = crash_memory_range; + *ranges = memory_ranges; + return 0; +} + +/* Adds the appropriate mem= options to command line, indicating the + * memory region the new kernel can use to boot into. */ +static int cmdline_add_mem(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " mem="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the elfcorehdr= command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) +{ + int cmdlen, len, align = 1024; + char str[30], *ptr; + + /* Passing in elfcorehdr=xxxK format. Saves space required in cmdline. + * Ensure 1K alignment*/ + if (addr%align) + return -1; + addr = addr/align; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + return 0; +} + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATALOCAL ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATALOCAL ELFDATA2MSB +#else +# error Unknown byte order +#endif + +static struct crash_elf_info elf_info64 = { + class: ELFCLASS64, + data : ELFDATALOCAL, + machine : EM_MIPS, + page_offset : PAGE_OFFSET, + lowmem_limit : 0, /* 0 == no limit */ +}; + +static struct crash_elf_info elf_info32 = { + class: ELFCLASS32, + data : ELFDATALOCAL, + machine : EM_MIPS, + page_offset : PAGE_OFFSET, + lowmem_limit : MAXMEM, +}; + +static int patch_elf_info(void) +{ + const char cpuinfo[] = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + + fp = fopen(cpuinfo, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + cpuinfo, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (strncmp(line, "cpu model", 9) == 0) { + /* OCTEON uses a different page_offset. */ + if (strstr(line, "Octeon")) + elf_info64.page_offset = OCTEON_PAGE_OFFSET; + /* LOONGSON uses a different page_offset. */ + else if (strstr(line, "Loongson")) + elf_info64.page_offset = LOONGSON_PAGE_OFFSET; + break; + } + } + fclose(fp); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + unsigned long UNUSED(max_addr), + unsigned long UNUSED(min_base)) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges, align = 1024; + struct memory_range *mem_range; + crash_create_elf_headers_func crash_create = crash_create_elf32_headers; + struct crash_elf_info *elf_info = &elf_info32; + unsigned long start_offset = 0x80000000UL; + + if (patch_elf_info()) + return -1; + + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + elf_info = &elf_info64; + crash_create = crash_create_elf64_headers; + start_offset = (unsigned long)0xffffffff80000000UL; + } + + if (get_kernel_paddr(elf_info)) + return -1; + + if (get_kernel_vaddr_and_size(elf_info, start_offset)) + return -1; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + crash_reserved_mem.start, + crash_reserved_mem.end, -1); + + if (crash_create(info, elf_info, crash_memory_range, nr_ranges, + &tmp, &sz, ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, + crash_reserved_mem.start, + crash_reserved_mem.end, -1); + + /* + * backup segment is after elfcorehdr, so use elfcorehdr as top of + * kernel's available memory + */ + cmdline_add_mem(mod_cmdline, crash_reserved_mem.start, + crash_reserved_mem.end - crash_reserved_mem.start + 1); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + dbgprintf("CRASH MEMORY RANGES:\n"); + dbgprintf("%016Lx-%016Lx\n", crash_reserved_mem.start, + crash_reserved_mem.end); + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/mips/crashdump-mips.h b/kexec/arch/mips/crashdump-mips.h new file mode 100644 index 0000000..55c9925 --- /dev/null +++ b/kexec/arch/mips/crashdump-mips.h @@ -0,0 +1,30 @@ +#ifndef CRASHDUMP_MIPS_H +#define CRASHDUMP_MIPS_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); +#ifdef __mips64 +#define PAGE_OFFSET 0xa800000000000000ULL +#define MAXMEM 0 +#else +#define PAGE_OFFSET 0x80000000 +#define MAXMEM 0x80000000 +#endif +#define __pa(x) ((unsigned long)(X) & 0x7fffffff) + +#define LOONGSON_PAGE_OFFSET 0xffffffff80000000ULL +#define OCTEON_PAGE_OFFSET 0x8000000000000000ULL + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) +#define CRASH_MAX_MEMORY_RANGES (MAX_MEMORY_RANGES + 2) + +#define COMMAND_LINE_SIZE 512 + +/* Backup Region, First 1M of System RAM. */ +#define BACKUP_SRC_START 0x00000000 +#define BACKUP_SRC_END 0x000fffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +extern struct arch_options_t arch_options; +#endif /* CRASHDUMP_MIPS_H */ diff --git a/kexec/arch/mips/include/arch/options.h b/kexec/arch/mips/include/arch/options.h new file mode 100644 index 0000000..ba2f346 --- /dev/null +++ b/kexec/arch/mips/include/arch/options.h @@ -0,0 +1,43 @@ +#ifndef KEXEC_ARCH_MIPS_OPTIONS_H +#define KEXEC_ARCH_MIPS_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_DTB (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"dtb", 1, 0, OPT_DTB }, \ + {"initrd", 1, 0, OPT_RAMDISK }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_MIPS_OPTIONS_H */ diff --git a/kexec/arch/mips/kexec-elf-mips.c b/kexec/arch/mips/kexec-elf-mips.c new file mode 100644 index 0000000..230d806 --- /dev/null +++ b/kexec/arch/mips/kexec-elf-mips.c @@ -0,0 +1,242 @@ +/* + * kexec-elf-mips.c - kexec Elf loader for mips + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-elf-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-mips.h" +#include "crashdump-mips.h" +#include <arch/options.h> +#include "../../fs2dt.h" +#include "../../dt-ops.h" + +static const int probe_debug = 0; + +#define BOOTLOADER "kexec" +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) + +#define CMDLINE_PREFIX "kexec " +static char cmdline_buf[COMMAND_LINE_SIZE] = CMDLINE_PREFIX; + +/* Adds initrd parameters to command line. */ +static int cmdline_add_initrd(char *cmdline, unsigned long addr, char *new_para) +{ + int cmdlen, len; + char str[30], *ptr; + + ptr = str; + strcpy(str, new_para); + ptr += strlen(str); + ultoa(addr, ptr); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* add initrd to cmdline to compatible with previous platforms. */ +static int patch_initrd_info(char *cmdline, unsigned long base, + unsigned long size) +{ + const char cpuinfo[] = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + unsigned long page_offset = PAGE_OFFSET; + + fp = fopen(cpuinfo, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + cpuinfo, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (strncmp(line, "cpu model", 9) == 0) { + if (strstr(line, "Loongson")) { + /* LOONGSON64 uses a different page_offset. */ + if (arch_options.core_header_type == + CORE_TYPE_ELF64) + page_offset = LOONGSON_PAGE_OFFSET; + cmdline_add_initrd(cmdline, + page_offset + base, " rd_start="); + cmdline_add_initrd(cmdline, size, " rd_size="); + break; + } + } + } + fclose(fp); + return 0; +} + +int elf_mips_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_MIPS) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_mips_usage(void) +{ +} + +int elf_mips_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + int command_line_len = 0; + char *crash_cmdline; + int result; + unsigned long cmdline_addr; + size_t i; + off_t dtb_length; + char *dtb_buf; + char *initrd_buf = NULL; + unsigned long long kernel_addr = 0, kernel_size = 0; + unsigned long pagesize = getpagesize(); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + } else + crash_cmdline = NULL; + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Read in the PT_LOAD segments and remove CKSEG0 mask from address*/ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type == PT_LOAD) { + phdr->p_paddr = virt_to_phys(phdr->p_paddr); + kernel_addr = phdr->p_paddr; + kernel_size = phdr->p_memsz; + } + } + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + if (arch_options.command_line) + command_line_len = strlen(arch_options.command_line) + 1; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, crash_cmdline, + 0, 0); + if (result < 0) { + free(crash_cmdline); + return -1; + } + } + + if (arch_options.command_line) + strncat(cmdline_buf, arch_options.command_line, command_line_len); + if (crash_cmdline) + { + strncat(cmdline_buf, crash_cmdline, + sizeof(crash_cmdline) - + strlen(crash_cmdline) - 1); + free(crash_cmdline); + } + + if (info->kexec_flags & KEXEC_ON_CRASH) + /* In case of crashdump segment[0] is kernel. + * Put cmdline just after it. */ + cmdline_addr = (unsigned long)info->segment[0].mem + + info->segment[0].memsz; + else + cmdline_addr = 0; + + /* MIPS systems that have been converted to use device tree + * passed through UHI will use commandline in the DTB and + * the DTB passed as a separate buffer. Note that + * CMDLINE_PREFIX is skipped here intentionally, as it is + * used only in the legacy method */ + + if (arch_options.dtb_file) { + dtb_buf = slurp_file(arch_options.dtb_file, &dtb_length); + } else { + create_flatten_tree(&dtb_buf, &dtb_length, cmdline_buf + strlen(CMDLINE_PREFIX)); + } + + if (arch_options.initrd_file) { + initrd_buf = slurp_file(arch_options.initrd_file, &initrd_size); + + /* Create initrd entries in dtb - although at this time + * they would not point to the correct location */ + dtb_set_initrd(&dtb_buf, &dtb_length, (off_t)initrd_buf, (off_t)initrd_buf + initrd_size); + + initrd_base = add_buffer(info, initrd_buf, initrd_size, + initrd_size, sizeof(void *), + _ALIGN_UP(kernel_addr + kernel_size + dtb_length, + pagesize), 0x0fffffff, 1); + + /* Now that the buffer for initrd is prepared, update the dtb + * with an appropriate location */ + dtb_set_initrd(&dtb_buf, &dtb_length, initrd_base, initrd_base + initrd_size); + + /* Add the initrd parameters to cmdline */ + patch_initrd_info(cmdline_buf, initrd_base, initrd_size); + } + /* This is a legacy method for commandline passing used + * currently by Octeon CPUs only */ + add_buffer(info, cmdline_buf, sizeof(cmdline_buf), + sizeof(cmdline_buf), sizeof(void *), + cmdline_addr, 0x0fffffff, 1); + + add_buffer(info, dtb_buf, dtb_length, dtb_length, 0, + _ALIGN_UP(kernel_addr + kernel_size, pagesize), + 0x0fffffff, 1); + + return 0; +} + diff --git a/kexec/arch/mips/kexec-elf-rel-mips.c b/kexec/arch/mips/kexec-elf-rel-mips.c new file mode 100644 index 0000000..5bc22d5 --- /dev/null +++ b/kexec/arch/mips/kexec-elf-rel-mips.c @@ -0,0 +1,46 @@ +/* + * kexec-elf-rel-mips.c - kexec Elf relocation routines + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-elf-rel-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_MIPS) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch(r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/mips/kexec-mips.c b/kexec/arch/mips/kexec-mips.c new file mode 100644 index 0000000..d8cbea8 --- /dev/null +++ b/kexec/arch/mips/kexec-mips.c @@ -0,0 +1,191 @@ +/* + * kexec-mips.c - kexec for mips + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-ppc.c + * Copyright (C) 2004, 2005 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-mips.h" +#include <arch/options.h> + +/* Currently not used but required by top-level fs2dt code */ +off_t initrd_base = 0; +off_t initrd_size = 0; + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + const char iomem[] = "/proc/iomem"; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + char *str; + int type, consumed, count; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else { + continue; + } + if (memory_ranges > 0 && + memory_range[memory_ranges - 1].end == start && + memory_range[memory_ranges - 1].type == type) { + memory_range[memory_ranges - 1].end = end; + } else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +struct file_type file_type[] = { + {"elf-mips", elf_mips_probe, elf_mips_load, elf_mips_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --dtb=FILE Use FILE as the device tree blob.\n" + " --initrd=FILE Use FILE as initial ramdisk.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + ); +} + +struct arch_options_t arch_options = { +#ifdef __mips64 + .core_header_type = CORE_TYPE_ELF64, +#else + .core_header_type = CORE_TYPE_ELF32, +#endif +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + int opt; + char *cmdline = NULL; + const char *append = NULL; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_DTB: + arch_options.dtb_file = optarg; + break; + case OPT_RAMDISK: + arch_options.initrd_file = optarg; + break; + default: + break; + } + } + + arch_options.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arch_options.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arch_options.initrd_file); + dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, + arch_options.dtb_file); + + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_MIPS here. + */ + { "mips", KEXEC_ARCH_MIPS }, + { "mips64", KEXEC_ARCH_MIPS }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr & 0x7fffffff; +} + +/* + * add_segment() should convert base to a physical address on mips, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, virt_to_phys(base), memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on mips, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} + diff --git a/kexec/arch/mips/kexec-mips.h b/kexec/arch/mips/kexec-mips.h new file mode 100644 index 0000000..222c815 --- /dev/null +++ b/kexec/arch/mips/kexec-mips.h @@ -0,0 +1,30 @@ +#ifndef KEXEC_MIPS_H +#define KEXEC_MIPS_H + +#include <sys/types.h> + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 + +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +int elf_mips_probe(const char *buf, off_t len); +int elf_mips_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_mips_usage(void); + +struct arch_options_t { + char *command_line; + char *dtb_file; + char *initrd_file; + int core_header_type; +}; + +extern struct memory_ranges usablemem_rgns; +extern off_t initrd_base, initrd_size; + +#endif /* KEXEC_MIPS_H */ diff --git a/kexec/arch/ppc/Makefile b/kexec/arch/ppc/Makefile new file mode 100644 index 0000000..71871f1 --- /dev/null +++ b/kexec/arch/ppc/Makefile @@ -0,0 +1,34 @@ +# +# kexec ppc (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc_KEXEC_SRCS = kexec/arch/ppc/kexec-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-rel-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-dol-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-uImage-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-simple.S +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-dol.S +ppc_KEXEC_SRCS += kexec/arch/ppc/fixup_dtb.c +ppc_KEXEC_SRCS += kexec/arch/ppc/fs2dt.c +ppc_KEXEC_SRCS += kexec/arch/ppc/crashdump-powerpc.c + +ppc_UIMAGE = kexec/kexec-uImage.c + +ppc_libfdt_SRCS = kexec/arch/ppc/libfdt-wrapper.c +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) +ppc_ARCH_REUSE_INITRD = + +ppc_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +ppc_KEXEC_SRCS += $(libfdt_SRCS) $(ppc_libfdt_SRCS) + +ppc_ASFLAGS = -Wa,--noexecstack + +dist += kexec/arch/ppc/Makefile $(ppc_KEXEC_SRCS) \ + kexec/arch/ppc/crashdump-powerpc.h kexec/arch/ppc/fixup_dtb.h \ + kexec/arch/ppc/kexec-ppc.h kexec/arch/ppc/ops.h \ + kexec/arch/ppc/ppc_asm.h \ + kexec/arch/ppc/include/page.h kexec/arch/ppc/include/types.h \ + kexec/arch/ppc/include/arch/options.h diff --git a/kexec/arch/ppc/crashdump-powerpc.c b/kexec/arch/ppc/crashdump-powerpc.c new file mode 100644 index 0000000..15e8531 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.c @@ -0,0 +1,433 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +#ifdef CONFIG_PPC64 +static struct crash_elf_info elf_info64 = { +class: ELFCLASS64, +data: ELFDATA2MSB, +machine: EM_PPC64, +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; +#endif +static struct crash_elf_info elf_info32 = { +class: ELFCLASS32, +data: ELFDATA2MSB, +#ifdef CONFIG_PPC64 +machine: EM_PPC64, +#else +machine: EM_PPC, +#endif +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range; +static int crash_nr_memory_ranges; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +/* Append a segment to crash_memory_range, splitting it into two if + * it contains both lowmem and highmem */ +static void add_crash_memory_range(unsigned long long start, + unsigned long long end) +{ +#ifndef CONFIG_PPC64 + if (start < elf_info32.lowmem_limit && end > elf_info32.lowmem_limit) { + add_crash_memory_range(start, elf_info32.lowmem_limit); + add_crash_memory_range(elf_info32.lowmem_limit, end); + return; + } +#endif + + if (crash_nr_memory_ranges < crash_max_memory_ranges) { + crash_memory_range[crash_nr_memory_ranges].start = start; + crash_memory_range[crash_nr_memory_ranges].end = end; + crash_memory_range[crash_nr_memory_ranges].type = RANGE_RAM; + } + + crash_nr_memory_ranges++; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *dmem; + int fd; + struct dirent *dentry, *mentry; + int n, crash_rng_len = 0; + unsigned long long start, end, cstart, cend; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + crash_nr_memory_ranges = 0; + +#ifndef CONFIG_BOOKE + /* create a separate program header for the backup region */ + add_crash_memory_range(BACKUP_SRC_START, BACKUP_SRC_END + 1); +#endif + + dir = opendir(device_tree); + if (!dir) { + perror(device_tree); + goto err; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) + && strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + dmem = opendir(fname); + if (!dmem) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + fd = open(fname, O_RDONLY); + if (fd < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + n = read_memory_region_limits(fd, &start, &end); + /* We are done with fd, close it. */ + close(fd); + if (n != 0) { + closedir(dmem); + closedir(dir); + goto err; + } +#ifndef CONFIG_BOOKE + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; +#endif + + /* + * Exclude the region that lies within crashkernel. + * If memory limit is set then exclude memory region + * above it. + */ + + if (memory_limit) { + if (start >= memory_limit) + continue; + if (end > memory_limit) + end = memory_limit; + } + + /* + * Exclure region used by crash kernel + */ + cstart = crash_base; + cend = crash_base + crash_size; + + if (cstart >= end || cend <= start) + add_crash_memory_range(start, end); + else { + if (start < cstart) + add_crash_memory_range(start, cstart); + if (cend < end) + add_crash_memory_range(cend, end); + } + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + add_crash_memory_range(cstart, cend); + } + + if (crash_nr_memory_ranges >= crash_max_memory_ranges) { + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + *range = crash_memory_range; + *ranges = crash_nr_memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for (j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +/* Converts unsigned long to ascii string. */ +static void ulltoa(unsigned long long i, char *str) +{ + int j = 0, k; + char tmp; + + do { + str[j++] = i % 10 + '0'; + } while ((i /= 10) > 0); + str[j] = '\0'; + + /* Reverse the string. */ + for (j = 0, k = strlen(str) - 1; j < k; j++, k--) { + tmp = str[k]; + str[k] = str[j]; + str[j] = tmp; + } +} + +/* Append str to cmdline */ +static void add_cmdline(char *cmdline, char *str) +{ + int cmdline_size; + int cmdlen = strlen(cmdline) + strlen(str); + + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); +} + +static int add_cmdline_param(char *cmdline, unsigned long long addr, + char *cmdstr, char *byte) +{ + int align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ulltoa(addr, ptr); + strcat(str, byte); + + add_cmdline(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; +#ifndef CONFIG_BOOKE + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); +#endif + + /* On powerpc memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + +#ifdef CONFIG_PPC64 + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, &tmp, + &sz, ELF_CORE_HEADER_ALIGN) < 0) + return -1; + } else if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) + return -1; +#else + if (crash_create_elf32_headers(info, &elf_info32, crash_memory_range, + nr_ranges, &tmp, &sz, ELF_CORE_HEADER_ALIGN) + < 0) + return -1; +#endif + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, + min_base, max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + add_cmdline(mod_cmdline, " maxcpus=1"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i = 0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; + return; + } else if (end > uend) { + usablemem_rgns.ranges[i].end = end; + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = value; + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + value - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + diff --git a/kexec/arch/ppc/crashdump-powerpc.h b/kexec/arch/ppc/crashdump-powerpc.h new file mode 100644 index 0000000..97b5095 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.h @@ -0,0 +1,45 @@ +#ifndef CRASHDUMP_POWERPC_H +#define CRASHDUMP_POWERPC_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +extern struct arch_options_t arch_options; + +#ifdef CONFIG_PPC64 +#define PAGE_OFFSET 0xC000000000000000UL +#define VMALLOCBASE 0xD000000000000000UL +#define MAXMEM (-KERNELBASE-VMALLOCBASE) +#else +#define PAGE_OFFSET 0xC0000000 +#define MAXMEM 0x30000000 /* Use CONFIG_LOWMEM_SIZE from kernel */ +#endif + +#define KERNELBASE PAGE_OFFSET +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + +#ifdef CONFIG_BOOKE +/* We don't need backup region in Book E */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0x0000 +#define BACKUP_SRC_SIZE 0x0000 +#else +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) +#endif + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +extern unsigned long long crash_base; +extern unsigned long long crash_size; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; +extern uint64_t memory_limit; + +#endif /* CRASHDUMP_POWERPC_H */ diff --git a/kexec/arch/ppc/fixup_dtb.c b/kexec/arch/ppc/fixup_dtb.c new file mode 100644 index 0000000..92a0bfd --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.c @@ -0,0 +1,408 @@ +#define _GNU_SOURCE +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include <libfdt.h> +#include "ops.h" +#include "page.h" +#include "fixup_dtb.h" +#include "kexec-ppc.h" + +const char proc_dts[] = "/proc/device-tree"; + +static void print_fdt_reserve_regions(char *blob_buf) +{ + int i, num; + + if (!kexec_debug) + return; + /* Print out a summary of the final reserve regions */ + num = fdt_num_mem_rsv(blob_buf); + dbgprintf ("reserve regions: %d\n", num); + for (i = 0; i < num; i++) { + uint64_t offset, size; + + if (fdt_get_mem_rsv(blob_buf, i, &offset, &size) == 0) { + dbgprintf("%d: offset: %llx, size: %llx\n", i, offset, size); + } else { + dbgprintf("Error retreiving reserved region\n"); + } + } +} + + +static void fixup_nodes(char *nodes[]) +{ + int index = 0; + char *fname; + char *prop_name; + char *node_name; + void *node; + int len; + char *content; + off_t content_size; + int ret; + + while (nodes[index]) { + + len = asprintf(&fname, "%s%s", proc_dts, nodes[index]); + if (len < 0) + die("asprintf() failed\n"); + + content = slurp_file(fname, &content_size); + if (!content) { + die("Can't open %s: %s\n", fname, strerror(errno)); + } + + prop_name = fname + len; + while (*prop_name != '/') + prop_name--; + + *prop_name = '\0'; + prop_name++; + + node_name = fname + sizeof(proc_dts) - 1; + + node = finddevice(node_name); + if (!node) + node = create_node(NULL, node_name + 1); + + ret = setprop(node, prop_name, content, content_size); + if (ret < 0) + die("setprop of %s/%s size: %ld failed: %s\n", + node_name, prop_name, content_size, + fdt_strerror(ret)); + + free(content); + free(fname); + index++; + }; +} + +/* + * command line priority: + * - use the supplied command line + * - if none available use the command line from .dtb + * - if not available use the current command line + */ +static void fixup_cmdline(const char *cmdline) +{ + void *chosen; + char *fixup_cmd_node[] = { + "/chosen/bootargs", + NULL, + }; + + chosen = finddevice("/chosen"); + + if (!cmdline) { + if (!chosen) + fixup_nodes(fixup_cmd_node); + } else { + if (!chosen) + chosen = create_node(NULL, "chosen"); + setprop_str(chosen, "bootargs", cmdline); + } + return; +} + +#define EXPAND_GRANULARITY 1024 + +static char *expand_buf(int minexpand, char *blob_buf, off_t *blob_size) +{ + int size = fdt_totalsize(blob_buf); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + blob_buf = realloc(blob_buf, size); + if (!blob_buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(blob_buf, blob_buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + *blob_size = fdt_totalsize(blob_buf); + + return blob_buf; +} + +static void fixup_reserve_regions(struct kexec_info *info, char *blob_buf) +{ + int ret, i; + int nodeoffset; + u64 val = 0; + + /* If this is a KEXEC kernel we add all regions since they will + * all need to be saved */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + for (i = 0; i < info->nr_segments; i++) { + uint64_t address = (unsigned long)info->segment[i].mem; + uint64_t size = info->segment[i].memsz; + + while ((i+1) < info->nr_segments && + (address + size == (unsigned long)info->segment[i+1].mem)) { + size += info->segment[++i].memsz; + } + + ret = fdt_add_mem_rsv(blob_buf, address, size); + if (ret) { + printf("%s: Error adding memory range to memreserve!\n", + fdt_strerror(ret)); + goto out; + } + } + } else if (ramdisk || reuse_initrd) { + /* Otherwise we just add back the ramdisk and the device tree + * is already in the list */ + ret = fdt_add_mem_rsv(blob_buf, ramdisk_base, ramdisk_size); + if (ret) { + printf("%s: Unable to add new reserved memory for initrd flat device tree\n", + fdt_strerror(ret)); + goto out; + } + } + +#if 0 + /* XXX: Do not reserve spin-table for CPUs. */ + + /* Add reserve regions for cpu-release-addr */ + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, -1, "device_type", "cpu", 4); + while (nodeoffset != -FDT_ERR_NOTFOUND) { + const void *buf; + int sz, ret; + u64 tmp; + + buf = fdt_getprop(blob_buf, nodeoffset, "cpu-release-addr", &sz); + + if (buf) { + if (sz == 4) { + tmp = *(u32 *)buf; + } else if (sz == 8) { + tmp = *(u64 *)buf; + } + + /* crude check to see if last value is repeated */ + if (_ALIGN_DOWN(tmp, PAGE_SIZE) != _ALIGN_DOWN(val, PAGE_SIZE)) { + val = tmp; + ret = fdt_add_mem_rsv(blob_buf, _ALIGN_DOWN(val, PAGE_SIZE), PAGE_SIZE); + if (ret) + printf("%s: Unable to add reserve for cpu-release-addr!\n", + fdt_strerror(ret)); + } + } + + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, nodeoffset, + "device_type", "cpu", 4); + } +#endif + +out: + print_fdt_reserve_regions(blob_buf); +} + +static void fixup_memory(struct kexec_info *info, char *blob_buf) +{ + if (info->kexec_flags & KEXEC_ON_CRASH) { + int nodeoffset, len = 0; + u8 tmp[16]; + const unsigned long *addrcell, *sizecell; + + nodeoffset = fdt_path_offset(blob_buf, "/memory"); + + if (nodeoffset < 0) { + printf("Error searching for memory node!\n"); + return; + } + + addrcell = fdt_getprop(blob_buf, 0, "#address-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((addrcell) && (*addrcell == 2)) { + tmp[0] = (crash_base >> 56) & 0xff; + tmp[1] = (crash_base >> 48) & 0xff; + tmp[2] = (crash_base >> 40) & 0xff; + tmp[3] = (crash_base >> 32) & 0xff; + tmp[4] = (crash_base >> 24) & 0xff; + tmp[5] = (crash_base >> 16) & 0xff; + tmp[6] = (crash_base >> 8) & 0xff; + tmp[7] = (crash_base ) & 0xff; + len = 8; + } else { + tmp[0] = (crash_base >> 24) & 0xff; + tmp[1] = (crash_base >> 16) & 0xff; + tmp[2] = (crash_base >> 8) & 0xff; + tmp[3] = (crash_base ) & 0xff; + len = 4; + } + + sizecell = fdt_getprop(blob_buf, 0, "#size-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((sizecell) && (*sizecell == 2)) { + tmp[0+len] = (crash_size >> 56) & 0xff; + tmp[1+len] = (crash_size >> 48) & 0xff; + tmp[2+len] = (crash_size >> 40) & 0xff; + tmp[3+len] = (crash_size >> 32) & 0xff; + tmp[4+len] = (crash_size >> 24) & 0xff; + tmp[5+len] = (crash_size >> 16) & 0xff; + tmp[6+len] = (crash_size >> 8) & 0xff; + tmp[7+len] = (crash_size ) & 0xff; + len += 8; + } else { + tmp[0+len] = (crash_size >> 24) & 0xff; + tmp[1+len] = (crash_size >> 16) & 0xff; + tmp[2+len] = (crash_size >> 8) & 0xff; + tmp[3+len] = (crash_size ) & 0xff; + len += 4; + } + + if (fdt_setprop(blob_buf, nodeoffset, "reg", tmp, len) != 0) { + printf ("Error setting memory node!\n"); + } + + fdt_delprop(blob_buf, nodeoffset, "linux,usable-memory"); + } +} + +/* removes crashkernel nodes if they exist and we are *rebooting* + * into a crashkernel. These nodes should not exist after we + * crash and reboot into a new kernel + */ +static void fixup_crashkernel(struct kexec_info *info, char *blob_buf) +{ + int nodeoffset; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (nodeoffset < 0) { + printf("fdt_crashkernel: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-base"); + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-size"); + } +} +/* remove the old chosen nodes if they exist and add correct chosen + * nodes if we have an initd + */ +static void fixup_initrd(char *blob_buf) +{ + int err, nodeoffset; + unsigned long tmp; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (nodeoffset < 0) { + printf("fdt_initrd: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-start"); + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-end"); + + if ((reuse_initrd || ramdisk) && + ((ramdisk_base != 0) && (ramdisk_size != 0))) { + tmp = ramdisk_base; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-start", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: " + "could not set linux,initrd-start %s.\n", + fdt_strerror(err)); + return; + } + + tmp = ramdisk_base + ramdisk_size; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-end", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: could not set linux,initrd-end %s.\n", + fdt_strerror(err)); + return; + } + } +} + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr) +{ + int ret, i, num = fdt_num_mem_rsv(blob_buf); + + fdt_init(blob_buf); + + /* Remove the existing reserve regions as they will no longer + * be valid after we reboot */ + for (i = num - 1; i >= 0; i--) { + ret = fdt_del_mem_rsv(blob_buf, i); + if (ret) { + printf("%s: Error deleting memory reserve region %d from device tree!\n", + fdt_strerror(ret), i); + } + } + + /* Pack the FDT first, so we don't grow excessively if there is already free space */ + ret = fdt_pack(blob_buf); + if (ret) + printf("%s: Unable to pack flat device tree\n", fdt_strerror(ret)); + + /* info->nr_segments just a guide, will grow by at least EXPAND_GRANULARITY */ + blob_buf = expand_buf(info->nr_segments * sizeof(struct fdt_reserve_entry), + blob_buf, blob_size); + + /* add reserve region for *THIS* fdt */ + *dtb_addr = locate_hole(info, *blob_size, 0, + hole_addr, hole_addr+KERNEL_ACCESS_TOP, -1); + ret = fdt_add_mem_rsv(blob_buf, *dtb_addr, PAGE_ALIGN(*blob_size)); + if (ret) { + printf("%s: Unable to add new reserved memory for the flat device tree\n", + fdt_strerror(ret)); + } + + return blob_buf; +} + +static void save_fixed_up_dtb(char *blob_buf, off_t blob_size) +{ + FILE *fp; + + if (!kexec_debug) + return; + fp = fopen("debug.dtb", "w"); + if (fp) { + if ( blob_size == fwrite(blob_buf, sizeof(char), blob_size, fp)) { + dbgprintf("debug.dtb written\n"); + } else { + dbgprintf("Unable to write debug.dtb\n"); + } + + fclose(fp); + } else { + dbgprintf("Unable to dump flat device tree to debug.dtb\n"); + } +} + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline) +{ + fixup_nodes(nodes); + fixup_cmdline(cmdline); + fixup_reserve_regions(info, blob_buf); + fixup_memory(info, blob_buf); + fixup_initrd(blob_buf); + fixup_crashkernel(info, blob_buf); + + blob_buf = (char *)dt_ops.finalize(); + *blob_size = fdt_totalsize(blob_buf); + + save_fixed_up_dtb(blob_buf, *blob_size); + + return blob_buf; +} diff --git a/kexec/arch/ppc/fixup_dtb.h b/kexec/arch/ppc/fixup_dtb.h new file mode 100644 index 0000000..b706a5a --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.h @@ -0,0 +1,10 @@ +#ifndef __FIXUP_DTB_H +#define __FIXUP_DTB_H + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr); + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline); + +#endif diff --git a/kexec/arch/ppc/fs2dt.c b/kexec/arch/ppc/fs2dt.c new file mode 100644 index 0000000..fed499b --- /dev/null +++ b/kexec/arch/ppc/fs2dt.c @@ -0,0 +1,471 @@ +/* + * fs2dt: creates a flattened device-tree + * + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include "types.h" + +#define MAXPATH 1024 /* max path name length */ +#define NAMESPACE 16384 /* max bytes for property names */ +#define TREEWORDS 65536 /* max 32 bit words for properties */ +#define MEMRESERVE 256 /* max number of reserved memory blks */ +#define MAX_MEMORY_RANGES 1024 + +static char pathname[MAXPATH]; +static char propnames[NAMESPACE] = { 0 }; +static unsigned dtstruct[TREEWORDS], *dt; +static unsigned long long mem_rsrv[2*MEMRESERVE] = { 0, 0 }; + +static int crash_param; +static char local_cmdline[COMMAND_LINE_SIZE] = { "" }; +static unsigned *dt_len; /* changed len of modified cmdline + in flat device-tree */ +static struct bootblock bb[1]; + +void reserve(unsigned long long where, unsigned long long length) +{ + size_t offset; + + for (offset = 0; mem_rsrv[offset + 1]; offset += 2) + ; + + if (offset + 4 >= 2 * MEMRESERVE) + die("unrecoverable error: exhasuted reservation meta data\n"); + + mem_rsrv[offset] = where; + mem_rsrv[offset + 1] = length; + mem_rsrv[offset + 3] = 0; /* N.B: don't care about offset + 2 */ +} + +/* look for properties we need to reserve memory space for */ +static void checkprop(char *name, unsigned *data, int len) +{ + static unsigned long long base, size, end; + + if ((data == NULL) && (base || size || end)) + die("unrecoverable error: no property data"); + else if (!strcmp(name, "linux,rtas-base")) + base = *data; + else if (!strcmp(name, "linux,tce-base")) + base = *(unsigned long long *) data; + else if (!strcmp(name, "rtas-size") || + !strcmp(name, "linux,tce-size")) + size = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) + if (len == 8) + base = *(unsigned long long *) data; + else + base = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) + end = *(unsigned long long *) data; + + if (size && end) + die("unrecoverable error: size and end set at same time\n"); + if (base && size) { + reserve(base, size); + base = 0; + size = 0; + } + if (base && end) { + reserve(base, end-base); + base = 0; + end = 0; + } +} + +/* + * return the property index for a property name, creating a new one + * if needed. + */ +static unsigned propnum(const char *name) +{ + unsigned offset = 0; + + while (propnames[offset]) + if (strcmp(name, propnames+offset)) + offset += strlen(propnames+offset)+1; + else + return offset; + + if (NAMESPACE - offset < strlen(name) + 1) + die("unrecoverable error: propnames overrun\n"); + + strcpy(propnames+offset, name); + + return offset; +} + +static void add_usable_mem_property(int fd, int len) +{ + char fname[MAXPATH], *bname; + unsigned long buf[2]; + unsigned long ranges[2*MAX_MEMORY_RANGES]; + unsigned long long base, end, loc_base, loc_end; + int range, rlen = 0; + + strcpy(fname, pathname); + bname = strrchr(fname, '/'); + bname[0] = '\0'; + bname = strrchr(fname, '/'); + if (strncmp(bname, "/memory@", 8) && strcmp(bname, "/memory")) + return; + + if (lseek(fd, 0, SEEK_SET) < 0) + die("unrecoverable error: error seeking in \"%s\": %s\n", + pathname, strerror(errno)); + if (read_memory_region_limits(fd, &base, &end) != 0) + die("unrecoverable error: error parsing memory/reg limits\n"); + + for (range = 0; range < usablemem_rgns.size; range++) { + loc_base = usablemem_rgns.ranges[range].start; + loc_end = usablemem_rgns.ranges[range].end; + if (loc_base >= base && loc_end <= end) { + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } + } + + if (!rlen) { + /* + * User did not pass any ranges for thsi region. Hence, write + * (0,0) duple in linux,usable-memory property such that + * this region will be ignored. + */ + ranges[rlen++] = 0; + ranges[rlen++] = 0; + } + + rlen = rlen * sizeof(unsigned long); + /* + * No add linux,usable-memory property. + */ + *dt++ = 3; + *dt++ = rlen; + *dt++ = propnum("linux,usable-memory"); + memcpy(dt, &ranges, rlen); + dt += (rlen + 3)/4; +} + +/* put all properties (files) in the property structure */ +static void putprops(char *fn, struct dirent **nlist, int numlist) +{ + struct dirent *dp; + int i = 0, fd, len; + struct stat statbuf; + + for (i = 0; i < numlist; i++) { + dp = nlist[i]; + strcpy(fn, dp->d_name); + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (!crash_param && !strcmp(fn, "linux,crashkernel-base")) + continue; + + if (!crash_param && !strcmp(fn, "linux,crashkernel-size")) + continue; + + /* + * This property will be created for each node during kexec + * boot. So, ignore it. + */ + if (!strcmp(dp->d_name, "linux,pci-domain") || + !strcmp(dp->d_name, "linux,htab-base") || + !strcmp(dp->d_name, "linux,htab-size") || + !strcmp(dp->d_name, "linux,kernel-end") || + !strcmp(dp->d_name, "linux,usable-memory")) + continue; + + /* This property will be created/modified later in putnode() + * So ignore it, unless we are reusing the initrd. + */ + if ((!strcmp(dp->d_name, "linux,initrd-start") || + !strcmp(dp->d_name, "linux,initrd-end")) && + !reuse_initrd) + continue; + + if (!S_ISREG(statbuf.st_mode)) + continue; + + len = statbuf.st_size; + + *dt++ = 3; + dt_len = dt; + *dt++ = len; + *dt++ = propnum(fn); + + fd = open(pathname, O_RDONLY); + if (fd == -1) + die("unrecoverable error: could not open \"%s\": %s\n", + pathname, strerror(errno)); + + if (read(fd, dt, len) != len) + die("unrecoverable error: could not read \"%s\": %s\n", + pathname, strerror(errno)); + + checkprop(fn, dt, len); + + /* Get the cmdline from the device-tree and modify it */ + if (!strcmp(dp->d_name, "bootargs")) { + int cmd_len; + char temp_cmdline[COMMAND_LINE_SIZE] = { "" }; + char *param = NULL; + cmd_len = strlen(local_cmdline); + if (cmd_len != 0) { + param = strstr(local_cmdline, "crashkernel="); + if (param) + crash_param = 1; + param = strstr(local_cmdline, "root="); + } + if (!param) { + char *old_param; + memcpy(temp_cmdline, dt, len); + param = strstr(temp_cmdline, "root="); + if (param) { + old_param = strtok(param, " "); + if (cmd_len != 0) + strcat(local_cmdline, " "); + strcat(local_cmdline, old_param); + } + } + strcat(local_cmdline, " "); + cmd_len = strlen(local_cmdline); + cmd_len = cmd_len + 1; + memcpy(dt, local_cmdline, cmd_len); + len = cmd_len; + *dt_len = cmd_len; + + dbgprintf("Modified cmdline:%s\n", local_cmdline); + + } + + dt += (len + 3)/4; + if (!strcmp(dp->d_name, "reg") && usablemem_rgns.size) + add_usable_mem_property(fd, len); + close(fd); + } + + fn[0] = '\0'; + checkprop(pathname, NULL, 0); +} + +/* + * Compare function used to sort the device-tree directories + * This function will be passed to scandir. + */ +static int comparefunc(const void *dentry1, const void *dentry2) +{ + char *str1 = (*(struct dirent **)dentry1)->d_name; + char *str2 = (*(struct dirent **)dentry2)->d_name; + char *p1, *p2; + int res = 0, max_len; + + /* + * strcmp scans from left to right and fails to idetify for some + * strings such as memory@10000000 and memory@f000000. + * Therefore, we get the wrong sorted order like memory@10000000 and + * memory@f000000. + */ + if ((p1 = strchr(str1, '@')) && (p2 = strchr(str2, '@'))) { + max_len = max(p1 - str1, p2 - str2); + if ((res = strncmp(str1, str2, max_len)) == 0) { + /* prefix is equal - compare part after '@' by length */ + p1++; p2++; + res = strlen(p1) - strlen(p2); + if (res == 0) + /* equal length, compare by strcmp() */ + res = strcmp(p1,p2); + } + } else { + res = strcmp(str1, str2); + } + + return res; +} + +/* + * put a node (directory) in the property structure. first properties + * then children. + */ +static void putnode(void) +{ + char *dn; + struct dirent *dp; + char *basename; + struct dirent **namelist; + int numlist, i; + struct stat statbuf; + + numlist = scandir(pathname, &namelist, 0, comparefunc); + if (numlist < 0) + die("unrecoverable error: could not scan \"%s\": %s\n", + pathname, strerror(errno)); + if (numlist == 0) + die("unrecoverable error: no directory entries in \"%s\"", + pathname); + + basename = strrchr(pathname, '/') + 1; + + *dt++ = 1; + strcpy((void *)dt, *basename ? basename : ""); + dt += strlen((void *)dt) / sizeof(unsigned) + 1; + + strcat(pathname, "/"); + dn = pathname + strlen(pathname); + + putprops(dn, namelist, numlist); + + /* + * Add initrd entries to the second kernel + * if + * a) a ramdisk is specified in cmdline + * OR + * b) reuseinitrd is specified and a initrd is + * used by the kernel. + * + */ + if ((ramdisk || (initrd_base && reuse_initrd)) + && !strcmp(basename, "chosen/")) { + int len = 8; + unsigned long long initrd_end; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-start"); + + memcpy(dt, &initrd_base, len); + dt += (len + 3)/4; + + len = 8; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-end"); + + initrd_end = initrd_base + initrd_size; + + memcpy(dt, &initrd_end, len); + dt += (len + 3)/4; + /* reserve the existing initrd image in case of reuse_initrd */ + if (initrd_base && initrd_size && reuse_initrd) + reserve(initrd_base, initrd_size); + } + + for (i = 0; i < numlist; i++) { + dp = namelist[i]; + strcpy(dn, dp->d_name); + free(namelist[i]); + + if (!strcmp(dn, ".") || !strcmp(dn, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (S_ISDIR(statbuf.st_mode)) + putnode(); + } + + *dt++ = 2; + dn[-1] = '\0'; + free(namelist); +} + +int create_flatten_tree(struct kexec_info *info, unsigned char **bufp, + unsigned long *sizep, char *cmdline) +{ + unsigned long len; + unsigned long tlen; + unsigned char *buf; + unsigned long me; + + me = 0; + + strcpy(pathname, "/proc/device-tree/"); + + dt = dtstruct; + + if (cmdline) + strcpy(local_cmdline, cmdline); + + putnode(); + *dt++ = 9; + + len = _ALIGN(sizeof(bb[0]), 8); + + bb->off_mem_rsvmap = len; + + for (len = 1; mem_rsrv[len]; len += 2) + ; + len += 3; + len *= sizeof(mem_rsrv[0]); + + bb->off_dt_struct = bb->off_mem_rsvmap + len; + + len = dt - dtstruct; + len *= sizeof(unsigned); + bb->dt_struct_size = len; + bb->off_dt_strings = bb->off_dt_struct + len; + + len = propnum(""); + bb->dt_strings_size = len; + len = _ALIGN(len, 4); + bb->totalsize = bb->off_dt_strings + len; + + bb->magic = 0xd00dfeed; + bb->version = 17; + bb->last_comp_version = 16; + + reserve(me, bb->totalsize); /* patched later in kexec_load */ + + buf = (unsigned char *) malloc(bb->totalsize); + *bufp = buf; + memcpy(buf, bb, bb->off_mem_rsvmap); + tlen = bb->off_mem_rsvmap; + memcpy(buf+tlen, mem_rsrv, bb->off_dt_struct - bb->off_mem_rsvmap); + tlen = tlen + (bb->off_dt_struct - bb->off_mem_rsvmap); + memcpy(buf+tlen, dtstruct, bb->off_dt_strings - bb->off_dt_struct); + tlen = tlen + (bb->off_dt_strings - bb->off_dt_struct); + memcpy(buf+tlen, propnames, bb->totalsize - bb->off_dt_strings); + tlen = tlen + bb->totalsize - bb->off_dt_strings; + *sizep = tlen; + return 0; +} diff --git a/kexec/arch/ppc/include/arch/options.h b/kexec/arch/ppc/include/arch/options.h new file mode 100644 index 0000000..b2176ab --- /dev/null +++ b/kexec/arch/ppc/include/arch/options.h @@ -0,0 +1,47 @@ +#ifndef KEXEC_ARCH_PPC_OPTIONS_H +#define KEXEC_ARCH_PPC_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_GAMECUBE (OPT_ARCH_MAX+1) +#define OPT_DTB (OPT_ARCH_MAX+2) +#define OPT_NODES (OPT_ARCH_MAX+3) +#define OPT_RAMDISK (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND},\ + {"append", 1, 0, OPT_APPEND},\ + {"ramdisk", 1, 0, OPT_APPEND},\ + {"initrd", 1, 0, OPT_APPEND},\ + {"gamecube", 1, 0, OPT_GAMECUBE},\ + {"dtb", 1, 0, OPT_DTB},\ + {"reuse-node", 1, 0, OPT_NODES}, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + +#endif /* KEXEC_ARCH_PPC_OPTIONS_H */ diff --git a/kexec/arch/ppc/include/page.h b/kexec/arch/ppc/include/page.h new file mode 100644 index 0000000..65877bc --- /dev/null +++ b/kexec/arch/ppc/include/page.h @@ -0,0 +1,27 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/kexec/arch/ppc/include/types.h b/kexec/arch/ppc/include/types.h new file mode 100644 index 0000000..31393d1 --- /dev/null +++ b/kexec/arch/ppc/include/types.h @@ -0,0 +1,27 @@ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef signed char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#endif /* _TYPES_H_ */ diff --git a/kexec/arch/ppc/kexec-dol-ppc.c b/kexec/arch/ppc/kexec-dol-ppc.c new file mode 100644 index 0000000..800c072 --- /dev/null +++ b/kexec/arch/ppc/kexec-dol-ppc.c @@ -0,0 +1,468 @@ +/* + * kexec-dol-ppc.c - kexec DOL executable loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include <arch/options.h> + +static int debug = 0; + +/* + * I've found out there DOLs with unaligned and/or overlapping sections. + * I assume that sizes of sections can be wrong on these DOLs so I trust + * better start of sections. + * In order to load DOLs, I first extend sections to page aligned boundaries + * and then merge overlapping sections starting from lower addresses. + * -- Albert Herranz + */ + +/* DOL related stuff */ + +#define DOL_HEADER_SIZE 0x100 + +#define DOL_SECT_MAX_TEXT 7 /* text sections */ +#define DOL_SECT_MAX_DATA 11 /* data sections */ +#define DOL_MAX_SECT (DOL_SECT_MAX_TEXT+DOL_SECT_MAX_DATA) + +/* this is the DOL executable header */ +typedef struct { + uint32_t offset_text[DOL_SECT_MAX_TEXT]; /* in the file */ + uint32_t offset_data[DOL_SECT_MAX_DATA]; + uint32_t address_text[DOL_SECT_MAX_TEXT]; /* in memory */ + uint32_t address_data[DOL_SECT_MAX_DATA]; + uint32_t size_text[DOL_SECT_MAX_TEXT]; + uint32_t size_data[DOL_SECT_MAX_DATA]; + uint32_t address_bss; + uint32_t size_bss; + uint32_t entry_point; +} dol_header; + +#define dol_sect_offset(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->offset_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->offset_text[index]) +#define dol_sect_address(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->address_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->address_text[index]) +#define dol_sect_size(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->size_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->size_text[index]) +#define dol_sect_type(index) \ + ((index >= DOL_SECT_MAX_TEXT) ? "data" : "text") + +typedef struct { + uint32_t sects_bitmap; + uint32_t start; + uint32_t size; +} dol_segment; + +#define dol_seg_end(s1) \ + (s1->start + s1->size) +#define dol_seg_after_sect(s1, s2) \ + (s1->start >= dol_seg_end(s2)) +#define dol_seg_overlaps(s1, s2) \ + (!(dol_seg_after_sect(s1,s2) || dol_seg_after_sect(s2,s1))) + +/* same as in asm/page.h */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define MAX_COMMAND_LINE 256 + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; + +void print_sects_bitmap(dol_segment * seg) +{ + int i, first_seen; + + printf("\t" "sects_bitmap"); + first_seen = 0; + for (i = 0; i < DOL_MAX_SECT; i++) { + if ((seg->sects_bitmap & (1 << i)) == 0) + continue; + printf("%c%d", (first_seen ? ',' : '='), i); + first_seen = 1; + } + printf("\n"); +} + +void print_dol_segment(dol_segment * seg) +{ + printf("dol segment:\n"); + printf("\t" "start=%08lx, size=%ld (%08lx)\n", + (unsigned long)seg->start, (unsigned long)seg->size, + (unsigned long)seg->size); + printf("\t" "end=%08lx\n", (unsigned long)dol_seg_end(seg)); + print_sects_bitmap(seg); +} + +int load_dol_segments(dol_segment * seg, int max_segs, dol_header * h) +{ + int i, n, remaining; + unsigned int start, size; + unsigned long adj1, adj2, end1; + + n = 0; + remaining = max_segs; + for (i = 0; i < DOL_MAX_SECT && remaining > 0; i++) { + /* zero here means the section is not in use */ + if (dol_sect_size(h, i) == 0) + continue; + + /* we initially map 1 seg to 1 sect */ + seg->sects_bitmap = (1 << i); + + start = dol_sect_address(h, i); + size = dol_sect_size(h, i); + + /* page align the segment */ + seg->start = start & PAGE_MASK; + end1 = start + size; + adj1 = start - seg->start; + adj2 = PAGE_ALIGN(end1) - end1; + seg->size = adj1 + size + adj2; + + //print_dol_segment(seg); + + seg++; + remaining--; + n++; + } + return n; +} + +void fix_dol_segments_overlaps(dol_segment * seg, int max_segs) +{ + int i, j; + dol_segment *p, *pp; + long extra_length; + + /* look for overlapping segments and fix them */ + for (i = 0; i < max_segs; i++) { + p = seg + i; /* segment p */ + + /* not really a segment */ + if (p->size == 0) + continue; + + /* check if overlaps any previous segments */ + for (j = 0; j < i; j++) { + pp = seg + j; /* segment pp */ + + /* not a segment or no overlap */ + if (pp->size == 0 || !dol_seg_overlaps(p, pp)) + continue; + + /* merge the two segments */ + if (pp->start < p->start) { + /* extend pp to include p and delete p */ + extra_length = dol_seg_end(p) - dol_seg_end(pp); + if (extra_length > 0) { + pp->size += extra_length; + } + pp->sects_bitmap |= p->sects_bitmap; + p->size = p->start = p->sects_bitmap = 0; + + /* restart the loop because p was deleted */ + i = 0; + break; + } else { + /* extend p to include pp and delete pp */ + extra_length = dol_seg_end(pp) - dol_seg_end(p); + if (extra_length > 0) { + p->size += extra_length; + } + p->sects_bitmap |= pp->sects_bitmap; + pp->size = pp->start = pp->sects_bitmap = 0; + } + } + } +} + +int dol_ppc_probe(const char *buf, off_t dol_length) +{ + dol_header header, *h; + int i, valid = 0; + + /* the DOL file should be at least as long as the DOL header */ + if (dol_length < DOL_HEADER_SIZE) { + if (debug) { + fprintf(stderr, "Not a DOL file, too short.\n"); + } + return -1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* now perform some sanity checks */ + for (i = 0; i < DOL_MAX_SECT; i++) { + /* DOL segment MAY NOT be physically stored in the header */ + if ((dol_sect_offset(h, i) != 0) + && (dol_sect_offset(h, i) < DOL_HEADER_SIZE)) { + if (debug) { + fprintf(stderr, + "%s segment offset within DOL header\n", + dol_sect_type(i)); + } + return -1; + } + + /* end of physical storage must be within file */ + if ((uintmax_t)(dol_sect_offset(h, i) + dol_sect_size(h, i)) > + (uintmax_t)dol_length) { + if (debug) { + fprintf(stderr, + "%s segment past DOL file size\n", + dol_sect_type(i)); + } + return -1; + } + + /* we only should accept DOLs with segments above 2GB */ + if (dol_sect_address(h, i) != 0 + && !(dol_sect_address(h, i) & 0x80000000)) { + fprintf(stderr, "warning, %s segment below 2GB\n", + dol_sect_type(i)); + } + + if (i < DOL_SECT_MAX_TEXT) { + /* remember that entrypoint was in a code segment */ + if (h->entry_point >= dol_sect_address(h, i) + && h->entry_point < dol_sect_address(h, i) + + dol_sect_size(h, i)) + valid = 1; + } + } + + /* if there is a BSS segment it must^H^H^H^Hshould be above 2GB, too */ + if (h->address_bss != 0 && !(h->address_bss & 0x80000000)) { + fprintf(stderr, "warning, BSS segment below 2GB\n"); + } + + /* if entrypoint is not within a code segment reject this file */ + if (!valid) { + if (debug) { + fprintf(stderr, "Entry point out of text segment\n"); + } + return -1; + } + + /* I've got a dol */ + return 0; +} + +void dol_ppc_usage(void) +{ + printf + (" --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n"); + +} + +int dol_ppc_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + dol_header header, *h; + unsigned long entry; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + const char *command_line; + int command_line_len; + unsigned long mstart; + dol_segment dol_segs[DOL_MAX_SECT]; + unsigned int sects_bitmap; + unsigned long lowest_start; + int i, j, k; + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + + /* + * Parse the command line arguments + */ + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* set entry point */ + entry = h->entry_point; + + /* convert the DOL sections into suitable page aligned segments */ + memset(dol_segs, 0, sizeof(dol_segs)); + + load_dol_segments(dol_segs, DOL_MAX_SECT, h); + fix_dol_segments_overlaps(dol_segs, DOL_MAX_SECT); + + /* load rest of segments */ + for (i = 0; i < DOL_MAX_SECT; i++) { + unsigned char *seg_buf; + /* not a segment */ + if (dol_segs[i].size == 0) + continue; + + //print_dol_segment(&dol_segs[i]); + + /* prepare segment */ + seg_buf = xmalloc(dol_segs[i].size); + mstart = dol_segs[i].start; + if (mstart & 0xf0000000) { + /* + * GameCube DOLs expect memory mapped this way: + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + mstart &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } + add_segment(info, + seg_buf, dol_segs[i].size, + mstart, dol_segs[i].size); + + + /* load sections into segment memory, according to bitmap */ + sects_bitmap = 0; + while (sects_bitmap != dol_segs[i].sects_bitmap) { + unsigned char *sec_buf; + /* find lowest start address for section */ + lowest_start = 0xffffffff; + for (j = -1, k = 0; k < DOL_MAX_SECT; k++) { + /* continue if section is already done */ + if ((sects_bitmap & (1 << k)) != 0) + continue; + /* do nothing for non sections */ + if ((dol_segs[i].sects_bitmap & (1 << k)) == 0) + continue; + /* found new candidate */ + if (dol_sect_address(h, k) < lowest_start) { + lowest_start = dol_sect_address(h, k); + j = k; + } + } + /* mark section as being loaded */ + sects_bitmap |= (1 << j); + + /* read it from file to the right place */ + sec_buf = seg_buf + + (dol_sect_address(h, j) - dol_segs[i].start); + memcpy(sec_buf, buf + dol_sect_offset(h, j), + dol_sect_size(h, j)); + } + } + + /* build the setup glue and argument segment (segment 0) */ + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_dol_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_dol_size, 4)); + + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + setup_dol_regs.spr8 = entry; /* Link Register */ + + memcpy(arg_buf, setup_dol_start, setup_dol_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + + if (debug) { + fprintf(stdout, "entry = %p\n", (void *)arg_base); + print_segments(stdout, info); + } + + info->entry = (void *)arg_base; + return 0; +} diff --git a/kexec/arch/ppc/kexec-elf-ppc.c b/kexec/arch/ppc/kexec-elf-ppc.c new file mode 100644 index 0000000..4a4886e --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-ppc.c @@ -0,0 +1,458 @@ +/* + * kexec-elf-ppc.c - kexec Elf loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc.h" +#include <arch/options.h> +#include "../../kexec-syscall.h" +#include "crashdump-powerpc.h" + +#include "config.h" +#include "fixup_dtb.h" + +static const int probe_debug = 0; + +unsigned char reuse_initrd; +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4); +#ifdef WITH_GAMECUBE +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; +#endif + +int elf_ppc_probe(const char *buf, off_t len) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_PPC) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +#ifdef WITH_GAMECUBE +static void gamecube_hack_addresses(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr, *phdr_end; + phdr_end = ehdr->e_phdr + ehdr->e_phnum; + for(phdr = ehdr->e_phdr; phdr != phdr_end; phdr++) { + /* + * GameCube ELF kernel is linked with memory mapped + * this way (to easily transform it into a DOL + * suitable for being loaded with psoload): + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + phdr->p_paddr &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } +} +#endif + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"gamecube", 1, 0, OPT_GAMECUBE}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void elf_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --gamecube=1|0 Enable/disable support for ELFs with changed\n" + " addresses suitable for the GameCube.\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line, *crash_cmdline, *cmdline_buf; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + int result; + char *error_msg; + unsigned long max_addr, hole_addr; + struct mem_phdr *phdr; + size_t size; +#ifdef CONFIG_PPC64 + unsigned long toc_addr; +#endif +#ifdef WITH_GAMECUBE + int target_is_gamecube = 1; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + unsigned char *setup_start; + uint32_t setup_size; +#else + char *seg_buf = NULL; + off_t seg_size = 0; + int target_is_gamecube = 0; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#endif + unsigned long kernel_addr; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + char *blob_buf = NULL; + off_t blob_size = 0; + + command_line = tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + hole_addr = 0; + kernel_addr = 0; + ramdisk = 0; + result = 0; + error_msg = NULL; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_GAMECUBE: + target_is_gamecube = atoi(optarg); + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line); + + fixup_nodes[cur_fixup] = NULL; + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + gamecube_hack_addresses(&ehdr); + } +#endif + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kernel_addr = locate_hole(info, size, 0, 0, max_addr, 1); +#ifdef CONFIG_PPC64 + ehdr.e_phdr[0].p_paddr = (Elf64_Addr)kernel_addr; +#else + ehdr.e_phdr[0].p_paddr = kernel_addr; +#endif + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) { + goto out; + } + + /* + * Need to append some command line parameters internally in case of + * taking crash dumps. Additional segments need to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + result = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (result < 0) { + result = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + /* + * In case of a toy we take the hardcoded things and an easy setup via + * one of the assembly startups. Every thing else should be grown up + * and go through the purgatory. + */ +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + setup_start = setup_dol_start; + setup_size = setup_dol_size; + setup_dol_regs.spr8 = ehdr.e_entry; /* Link Register */ + } else { + setup_start = setup_simple_start; + setup_size = setup_simple_size; + setup_simple_regs.spr8 = ehdr.e_entry; /* Link Register */ + } + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, elf_max_addr(&ehdr), 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_size, 4)); + + memcpy(arg_buf, setup_start, setup_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + info->entry = (void *)arg_base; +#else + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds size\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + if (command_line) + strncat(cmdline_buf, command_line, command_line_len); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, + sizeof(crash_cmdline) - + strlen(crash_cmdline) - 1); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, elf_max_addr(&ehdr), 1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, kernel_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* load the ramdisk *above* the device tree */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size + 1, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + kernel_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* + * set various variables for the purgatory. + * ehdr.e_entry is a virtual address. we know physical start + * address of the kernel (kernel_addr). Find the offset of + * e_entry from the virtual start address(e_phdr[0].p_vaddr) + * and calculate the actual physical address of the 'kernel entry'. + */ + addr = kernel_addr + (ehdr.e_entry - ehdr.e_phdr[0].p_vaddr); + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", + &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, + elf_max_addr(&ehdr), 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)elf_max_addr(&ehdr) - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); + +#undef TCB_SIZE +#undef TCB_TP_OFFSET + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +#endif +out: + free_elf_info(&ehdr); + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + + return result; +} diff --git a/kexec/arch/ppc/kexec-elf-rel-ppc.c b/kexec/arch/ppc/kexec-elf-rel-ppc.c new file mode 100644 index 0000000..1acbd86 --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-rel-ppc.c @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_PPC) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HI: + *(uint16_t *)location = (value>>16) & 0xffff; + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - address) < -0x02000000 + || (int)(value - address) >= 0x02000000) + { + die("Symbol more than 16MiB away"); + } + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - address) + & 0x03fffffc); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - address; + break; + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/ppc/kexec-ppc.c b/kexec/arch/ppc/kexec-ppc.c new file mode 100644 index 0000000..03bec36 --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.c @@ -0,0 +1,968 @@ +/* + * kexec-ppc.c - kexec for the PowerPC + * Copyright (C) 2004, 2005 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" +#include <arch/options.h> + +#include "config.h" + +unsigned long dt_address_cells = 0, dt_size_cells = 0; +uint64_t rmo_top; +uint64_t memory_limit; +unsigned long long crash_base = 0, crash_size = 0; +unsigned long long initrd_base = 0, initrd_size = 0; +unsigned long long ramdisk_base = 0, ramdisk_size = 0; +unsigned int rtas_base, rtas_size; +int max_memory_ranges; +const char *ramdisk; + +/* + * Reads the #address-cells and #size-cells on this platform. + * This is used to parse the memory/reg info from the device-tree + */ +int init_memory_region_info() +{ + size_t res = 0; + int fd; + char *file; + + file = "/proc/device-tree/#address-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_address_cells, sizeof(dt_address_cells)); + if (res != sizeof(dt_address_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + file = "/proc/device-tree/#size-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_size_cells, sizeof(dt_size_cells)); + if (res != sizeof(dt_size_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + /* Convert the sizes into bytes */ + dt_size_cells *= sizeof(unsigned long); + dt_address_cells *= sizeof(unsigned long); + + return 0; +} + +#define MAXBYTES 128 +/* + * Reads the memory region info from the device-tree node pointed + * by @fd and fills the *start, *end with the boundaries of the region + */ +int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end) +{ + char buf[MAXBYTES]; + unsigned long *p; + unsigned long nbytes = dt_address_cells + dt_size_cells; + + if (lseek(fd, 0, SEEK_SET) == -1) { + fprintf(stderr, "Error in file seek\n"); + return -1; + } + if (read(fd, buf, nbytes) != nbytes) { + fprintf(stderr, "Error reading the memory region info\n"); + return -1; + } + + p = (unsigned long*)buf; + if (dt_address_cells == sizeof(unsigned long)) { + *start = p[0]; + p++; + } else if (dt_address_cells == sizeof(unsigned long long)) { + *start = ((unsigned long long *)p)[0]; + p = (unsigned long long *)p + 1; + } else { + fprintf(stderr, "Unsupported value for #address-cells : %ld\n", + dt_address_cells); + return -1; + } + + if (dt_size_cells == sizeof(unsigned long)) + *end = *start + p[0]; + else if (dt_size_cells == sizeof(unsigned long long)) + *end = *start + ((unsigned long long *)p)[0]; + else { + fprintf(stderr, "Unsupported value for #size-cells : %ld\n", + dt_size_cells); + return -1; + } + + return 0; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +#ifdef WITH_GAMECUBE +#define MAX_MEMORY_RANGES 64 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +static int get_memory_ranges_gc(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + /* RAM - lowmem used by DOLs - framebuffer */ + memory_range[memory_ranges].start = 0x00003000; + memory_range[memory_ranges].end = 0x0174bfff; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + *range = memory_range; + *ranges = memory_ranges; + return 0; +} +#else +static int use_new_dtb; +static int nr_memory_ranges, nr_exclude_ranges; +static struct memory_range *exclude_range; +static struct memory_range *memory_range; +static struct memory_range *base_memory_range; +static uint64_t memory_max; + +/* + * Count the memory nodes under /proc/device-tree and populate the + * max_memory_ranges variable. This variable replaces MAX_MEMORY_RANGES + * macro used earlier. + */ +static int count_memory_ranges(void) +{ + char device_tree[256] = "/proc/device-tree/"; + struct dirent *dentry; + DIR *dir; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + max_memory_ranges++; + } + + /* need to add extra region for retained initrd */ + if (use_new_dtb) { + max_memory_ranges++; + } + + closedir(dir); + return 0; + +} + + static void cleanup_memory_ranges(void) + { + free(memory_range); + free(base_memory_range); + free(exclude_range); + } + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = malloc(memory_range_len); + if (!exclude_range) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, + memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, + memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, + memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, + memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + int local_memory_ranges = 0; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + struct dirent *dentry, *mentry; + int n, fd; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + unsigned long long start, end; + + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if (read_memory_region_limits(fd, &start, &end) != 0) { + close(fd); + closedir(dmem); + closedir(dir); + return -1; + } + if (local_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0){ + close(fd); + break; + } + } + + base_memory_range[local_memory_ranges].start = start; + base_memory_range[local_memory_ranges].end = end; + base_memory_range[local_memory_ranges].type = RANGE_RAM; + local_memory_ranges++; + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[local_memory_ranges-1].start, + base_memory_range[local_memory_ranges-1].end, + base_memory_range[local_memory_ranges-1].type); + close(fd); + } + closedir(dmem); + } + closedir(dir); + nr_memory_ranges = local_memory_ranges; + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +static int read_kernel_memory_limit(char *fname, char *buf) +{ + FILE *file; + int n; + + if (!fname || !buf) + return -1; + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return -1; + } + errno = 0; + /* + * fall through. On older kernel this file + * is not present. Hence return success. + */ + } else { + /* Memory limit property is of u64 type. */ + if ((n = fread(&memory_limit, 1, sizeof(uint64_t), file)) < 0) { + perror(fname); + goto err_out; + } + if (n != sizeof(uint64_t)) { + fprintf(stderr, "%s node has invalid size: %d\n", + fname, n); + goto err_out; + } + fclose(file); + } + return 0; +err_out: + fclose(file); + return -1; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rmo_base; + unsigned long long tce_base; + unsigned int tce_size; + unsigned long long htab_base, htab_size; + unsigned long long kernel_end; + unsigned long long initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strncmp(dentry->d_name, "memory", 6) && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + /* only reserve kernel region if we are doing a crash kernel */ + if (kexec_flags & KEXEC_ON_CRASH) { + strcat(fname, "/linux,kernel-end"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + kernel_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + kernel_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-base"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_base = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_base = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_size = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_size = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + +#ifndef CONFIG_BOOKE + add_usable_mem_rgns(0, crash_base + crash_size); + /* Reserve the region (KDUMP_BACKUP_LIMIT,crash_base) */ + reserve(KDUMP_BACKUP_LIMIT, + crash_base-KDUMP_BACKUP_LIMIT); +#else + add_usable_mem_rgns(crash_base, crash_size); +#endif + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + snprintf(fname, sizeof(fname), "%s%s%s", device_tree, + dentry->d_name, "/linux,memory-limit"); + if (read_kernel_memory_limit(fname, buf) < 0) + goto error_opencdir; + + /* reserve the initrd_start and end locations. */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-start"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_start = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_start = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_start = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-end"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_end = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + if ((initrd_end - initrd_start) != 0 ) { + initrd_base = initrd_start; + initrd_size = initrd_end - initrd_start; + } + + if (reuse_initrd) { + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + + /* HTAB */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + } /* chosen */ + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + closedir(cdir); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + int fd; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + goto error_opencdir; + } + if (read_memory_region_limits(fd, &rmo_base, &rmo_top) != 0) + goto error_openfile; + + if (rmo_top > 0x30000000UL) + rmo_top = 0x30000000UL; + + close(fd); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,tce-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + + +/* Setup a sorted list of memory ranges. */ +static int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit the end to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit range to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + } + + /* fixup in case we have no exclude regions */ + if (!j) { + memory_range[0].start = base_memory_range[0].start; + memory_range[0].end = rmo_top; + memory_range[0].type = RANGE_RAM; + nr_memory_ranges = 1; + } else + nr_memory_ranges = j; + + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + + +/* Return a list of valid memory ranges */ +int get_memory_ranges_dt(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + if (count_memory_ranges()) + return -1; + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + *range = memory_range; + *ranges = nr_memory_ranges; + return 0; +} +#endif + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int res = 0; + + res = init_memory_region_info(); + if (res != 0) + return res; +#ifdef WITH_GAMECUBE + return get_memory_ranges_gc(range, ranges, kexec_flags); +#else + return get_memory_ranges_dt(range, ranges, kexec_flags); +#endif +} + +struct file_type file_type[] = { + {"elf-ppc", elf_ppc_probe, elf_ppc_load, elf_ppc_usage}, + {"dol-ppc", dol_ppc_probe, dol_ppc_load, dol_ppc_usage}, + {"uImage-ppc", uImage_ppc_probe, uImage_ppc_load, uImage_ppc_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_PPC here. + */ + { "ppc", KEXEC_ARCH_DEFAULT }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + diff --git a/kexec/arch/ppc/kexec-ppc.h b/kexec/arch/ppc/kexec-ppc.h new file mode 100644 index 0000000..04e728e --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_PPC_H +#define KEXEC_PPC_H + +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ + +extern unsigned char setup_simple_start[]; +extern uint32_t setup_simple_size; + +extern struct { + uint32_t spr8; +} setup_simple_regs; + +extern unsigned char setup_dol_start[]; +extern uint32_t setup_dol_size; +extern uint64_t rmo_top; + +extern struct { + uint32_t spr8; +} setup_dol_regs; + +#define SIZE_16M (16*1024*1024UL) + +int elf_ppc_probe(const char *buf, off_t len); +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc_usage(void); + +int uImage_ppc_probe(const char *buf, off_t len); +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void uImage_ppc_usage(void); + +int dol_ppc_probe(const char *buf, off_t len); +int dol_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void dol_ppc_usage(void); + +/* + * During inital setup the kernel does not map the whole memory but a part of + * it. On Book-E that is 64MiB, 601 24MiB or 256MiB (if possible). + */ +#define KERNEL_ACCESS_TOP (24 * 1024 * 1024) + +/* boot block version 17 as defined by the linux kernel */ +struct bootblock { + unsigned magic, + totalsize, + off_dt_struct, + off_dt_strings, + off_mem_rsvmap, + version, + last_comp_version, + boot_physid, + dt_strings_size, + dt_struct_size; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; +extern mem_rgns_t usablemem_rgns; +extern int max_memory_ranges; +extern unsigned long long crash_base, crash_size; +extern unsigned long long initrd_base, initrd_size; +extern unsigned long long ramdisk_base, ramdisk_size; +extern unsigned char reuse_initrd; +extern const char *ramdisk; + +/* Method to parse the memory/reg nodes in device-tree */ +extern unsigned long dt_address_cells, dt_size_cells; +extern int init_memory_region_info(void); +extern int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end); +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); +/*fs2dt*/ +void reserve(unsigned long long where, unsigned long long length); + +/* Defined kexec-uImage-ppc.c */ +extern char* slurp_ramdisk_ppc(const char *filename, off_t *r_size); +#endif /* KEXEC_PPC_H */ diff --git a/kexec/arch/ppc/kexec-uImage-ppc.c b/kexec/arch/ppc/kexec-uImage-ppc.c new file mode 100644 index 0000000..e8f7adc --- /dev/null +++ b/kexec/arch/ppc/kexec-uImage-ppc.c @@ -0,0 +1,325 @@ +/* + * uImage support for PowerPC + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <getopt.h> +#include <arch/options.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "fixup_dtb.h" +#include <kexec-uImage.h> +#include "crashdump-powerpc.h" +#include <limits.h> + +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void uImage_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +/* + * Load the ramdisk into buffer. + * If the supplied image is in uImage format use + * uImage_load() to read the payload from the image. + */ +char *slurp_ramdisk_ppc(const char *filename, off_t *r_size) +{ + struct Image_info img; + off_t size; + const char *buf = slurp_file(filename, &size); + int rc; + + /* Check if this is a uImage RAMDisk */ + if (!buf) + return buf; + rc = uImage_probe_ramdisk(buf, size, IH_ARCH_PPC); + if (rc < 0) + die("uImage: Corrupted ramdisk file %s\n", filename); + else if (rc == 0) { + if (uImage_load(buf, size, &img) != 0) + die("uImage: Reading %ld bytes from %s failed\n", + size, filename); + buf = img.buf; + size = img.len; + } + + *r_size = size; + return buf; +} + +int uImage_ppc_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_PPC); +} + +static int ppc_load_bare_bits(int argc, char **argv, const char *buf, + off_t len, struct kexec_info *info, unsigned int load_addr, + unsigned int ep) +{ + char *command_line, *cmdline_buf, *crash_cmdline; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + int ret = 0; + char *seg_buf = NULL; + off_t seg_size = 0; + unsigned long long hole_addr; + unsigned long max_addr; + char *blob_buf = NULL; + off_t blob_size = 0; + char *error_msg = NULL; + + cmdline_buf = NULL; + command_line = NULL; + tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + + case OPT_RAMDISK: + ramdisk = optarg; + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line) + 1; + + fixup_nodes[cur_fixup] = NULL; + + /* + * len contains the length of the whole kernel image except the bss + * section. The 1 MiB should cover it. The purgatory and the dtb are + * allocated from memtop down towards zero so we should never get too + * close to the bss :) + */ +#define _1MiB (1 * 1024 * 1024) + + /* + * If the provided load_addr cannot be allocated, find a new + * area. Rebase the entry point based on the new load_addr. + */ + if (!valid_memory_range(info, load_addr, load_addr + (len + _1MiB))) { + int ep_offset = ep - load_addr; + + load_addr = locate_hole(info, len + _1MiB, 0, 0, max_addr, 1); + if (load_addr == ULONG_MAX) { + printf("Can't allocate memory for kernel of len %ld\n", + len + _1MiB); + return -1; + } + + ep = load_addr + ep_offset; + } + + add_segment(info, buf, len, load_addr, len + _1MiB); + + + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + ret = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (ret < 0) { + ret = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds maximum possible length\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + + if (command_line) + strcpy(cmdline_buf, command_line); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, crash_cmdline_len); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, -1, -1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, load_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* Load ramdisk at top of memory */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + load_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + printf("dtb_addr_actual: %lx, dtb_addr: %lx\n", dtb_addr_actual, dtb_addr); + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* set various variables for the purgatory */ + addr = ep; + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, -1, 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); + /* No allocation past here in order not to overwrite the stack */ +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)-1 - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); +#undef TCB_TP_OFFSET +#undef TCB_SIZE + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +out: + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + return ret; +} + +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct Image_info img; + int ret; + + ret = uImage_load(buf, len, &img); + if (ret) + return ret; + + return ppc_load_bare_bits(argc, argv, img.buf, img.len, info, + img.base, img.ep); +} diff --git a/kexec/arch/ppc/libfdt-wrapper.c b/kexec/arch/ppc/libfdt-wrapper.c new file mode 100644 index 0000000..ef355d0 --- /dev/null +++ b/kexec/arch/ppc/libfdt-wrapper.c @@ -0,0 +1,189 @@ +/* + * This file does the necessary interface mapping between the bootwrapper + * device tree operations and the interface provided by shared source + * files flatdevicetree.[ch]. + * + * Copyright 2007 David Gibson, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#include <page.h> +#include <libfdt.h> +#include "ops.h" +#include "../../kexec.h" + +#define BAD_ERROR(err) (((err) < 0) \ + && ((err) != -FDT_ERR_NOTFOUND) \ + && ((err) != -FDT_ERR_EXISTS)) + +#define check_err(err) \ + ({ \ + if (BAD_ERROR(err) || ((err < 0) && kexec_debug)) \ + printf("%s():%d %s\n\r", __func__, __LINE__, \ + fdt_strerror(err)); \ + if (BAD_ERROR(err)) \ + exit(1); \ + (err < 0) ? -1 : 0; \ + }) + +#define offset_devp(off) \ + ({ \ + int _offset = (off); \ + check_err(_offset) ? NULL : (void *)(_offset+1); \ + }) + +#define devp_offset_find(devp) (((int)(devp))-1) +#define devp_offset(devp) (devp ? ((int)(devp))-1 : 0) + +static void *fdt; +static void *buf; /* = NULL */ +struct dt_ops dt_ops; + +#define EXPAND_GRANULARITY 1024 + +static void expand_buf(int minexpand) +{ + int size = fdt_totalsize(fdt); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + buf = realloc(buf, size); + if (!buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(fdt, buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + fdt = buf; +} + +static void *fdt_wrapper_finddevice(const char *path) +{ + return offset_devp(fdt_path_offset(fdt, path)); +} + +static int fdt_wrapper_getprop(const void *devp, const char *name, + void *buf, const int buflen) +{ + const void *p; + int len; + + p = fdt_getprop(fdt, devp_offset(devp), name, &len); + if (!p) + return check_err(len); + memcpy(buf, p, min(len, buflen)); + return len; +} + +static int fdt_wrapper_setprop(const void *devp, const char *name, + const void *buf, const int len) +{ + int rc; + + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + if (rc == -FDT_ERR_NOSPACE) { + expand_buf(len + 16); + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + } + + return check_err(rc); +} + +static void *fdt_wrapper_get_parent(const void *devp) +{ + return offset_devp(fdt_parent_offset(fdt, devp_offset(devp))); +} + +static void *fdt_wrapper_create_node(const void *devp, const char *name) +{ + int offset; + + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + if (offset == -FDT_ERR_NOSPACE) { + expand_buf(strlen(name) + 16); + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + } + + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_prop_value(const void *prev, + const char *name, + const char *val, + int len) +{ + int offset = fdt_node_offset_by_prop_value(fdt, devp_offset_find(prev), + name, val, len); + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_compatible(const void *prev, + const char *val) +{ + int offset = fdt_node_offset_by_compatible(fdt, devp_offset_find(prev), + val); + return offset_devp(offset); +} + +static char *fdt_wrapper_get_path(const void *devp, char *buf, int len) +{ + int rc; + + rc = fdt_get_path(fdt, devp_offset(devp), buf, len); + if (check_err(rc)) + return NULL; + return buf; +} + +static unsigned long fdt_wrapper_finalize(void) +{ + int rc; + + rc = fdt_pack(fdt); + if (rc != 0) + die("Couldn't pack flat tree: %s\n\r", + fdt_strerror(rc)); + return (unsigned long)fdt; +} + +void fdt_init(void *blob) +{ + int err; + int bufsize; + + dt_ops.finddevice = fdt_wrapper_finddevice; + dt_ops.getprop = fdt_wrapper_getprop; + dt_ops.setprop = fdt_wrapper_setprop; + dt_ops.get_parent = fdt_wrapper_get_parent; + dt_ops.create_node = fdt_wrapper_create_node; + dt_ops.find_node_by_prop_value = fdt_wrapper_find_node_by_prop_value; + dt_ops.find_node_by_compatible = fdt_wrapper_find_node_by_compatible; + dt_ops.get_path = fdt_wrapper_get_path; + dt_ops.finalize = fdt_wrapper_finalize; + + /* Make sure the dt blob is the right version and so forth */ + fdt = blob; + bufsize = fdt_totalsize(fdt); + + err = fdt_open_into(fdt, fdt, bufsize); + if (err != 0) + die("fdt_init(): %s\n\r", fdt_strerror(err)); +} diff --git a/kexec/arch/ppc/ops.h b/kexec/arch/ppc/ops.h new file mode 100644 index 0000000..5e7a070 --- /dev/null +++ b/kexec/arch/ppc/ops.h @@ -0,0 +1,147 @@ +/* + * Global definition of all the bootwrapper operations. + * + * Author: Mark A. Greer <mgreer@mvista.com> + * + * 2006 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifndef _PPC_BOOT_OPS_H_ +#define _PPC_BOOT_OPS_H_ +#include "types.h" + +#define MAX_PATH_LEN 256 +#define MAX_PROP_LEN 256 /* What should this be? */ + +typedef void (*kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5); + +/* Device Tree operations */ +struct dt_ops { + void * (*finddevice)(const char *name); + int (*getprop)(const void *phandle, const char *name, void *buf, + const int buflen); + int (*setprop)(const void *phandle, const char *name, + const void *buf, const int buflen); + void *(*get_parent)(const void *phandle); + /* The node must not already exist. */ + void *(*create_node)(const void *parent, const char *name); + void *(*find_node_by_prop_value)(const void *prev, + const char *propname, + const char *propval, int proplen); + void *(*find_node_by_compatible)(const void *prev, + const char *compat); + unsigned long (*finalize)(void); + char *(*get_path)(const void *phandle, char *buf, int len); +}; +extern struct dt_ops dt_ops; + +void fdt_init(void *blob); +extern void flush_cache(void *, unsigned long); +int dt_xlate_reg(void *node, int res, unsigned long *addr, unsigned long *size); +int dt_xlate_addr(void *node, u32 *buf, int buflen, unsigned long *xlated_addr); +int dt_is_compatible(void *node, const char *compat); +void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize); +int dt_get_virtual_reg(void *node, void **addr, int nres); + +static inline void *finddevice(const char *name) +{ + return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL; +} + +static inline int getprop(void *devp, const char *name, void *buf, int buflen) +{ + return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1; +} + +static inline int setprop(void *devp, const char *name, + const void *buf, int buflen) +{ + return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1; +} +#define setprop_val(devp, name, val) \ + do { \ + typeof(val) x = (val); \ + setprop((devp), (name), &x, sizeof(x)); \ + } while (0) + +static inline int setprop_str(void *devp, const char *name, const char *buf) +{ + if (dt_ops.setprop) + return dt_ops.setprop(devp, name, buf, strlen(buf) + 1); + + return -1; +} + +static inline void *get_parent(const char *devp) +{ + return dt_ops.get_parent ? dt_ops.get_parent(devp) : NULL; +} + +static inline void *create_node(const void *parent, const char *name) +{ + return dt_ops.create_node ? dt_ops.create_node(parent, name) : NULL; +} + + +static inline void *find_node_by_prop_value(const void *prev, + const char *propname, + const char *propval, int proplen) +{ + if (dt_ops.find_node_by_prop_value) + return dt_ops.find_node_by_prop_value(prev, propname, + propval, proplen); + + return NULL; +} + +static inline void *find_node_by_prop_value_str(const void *prev, + const char *propname, + const char *propval) +{ + return find_node_by_prop_value(prev, propname, propval, + strlen(propval) + 1); +} + +static inline void *find_node_by_devtype(const void *prev, + const char *type) +{ + return find_node_by_prop_value_str(prev, "device_type", type); +} + +static inline void *find_node_by_alias(const char *alias) +{ + void *devp = finddevice("/aliases"); + + if (devp) { + char path[MAX_PATH_LEN]; + if (getprop(devp, alias, path, MAX_PATH_LEN) > 0) + return finddevice(path); + } + + return NULL; +} + +static inline void *find_node_by_compatible(const void *prev, + const char *compat) +{ + if (dt_ops.find_node_by_compatible) + return dt_ops.find_node_by_compatible(prev, compat); + + return NULL; +} + +#define dt_fixup_mac_addresses(...) \ + __dt_fixup_mac_addresses(0, __VA_ARGS__, NULL) + + +static inline char *get_path(const void *phandle, char *buf, int len) +{ + if (dt_ops.get_path) + return dt_ops.get_path(phandle, buf, len); + + return NULL; +} + +#endif /* _PPC_BOOT_OPS_H_ */ diff --git a/kexec/arch/ppc/ppc-setup-dol.S b/kexec/arch/ppc/ppc-setup-dol.S new file mode 100644 index 0000000..17169bd --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-dol.S @@ -0,0 +1,174 @@ +/* + * ppc-setup-dol.S - setup glue for Nintendo's GameCube + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include "ppc_asm.h" + + .data + .globl setup_dol_start +setup_dol_start: + + /* Try to reproduce the GameCube "native" environment */ + + /* Setup BATs */ + isync + li r8, 0 + mtspr DBAT0U, r8 + mtspr DBAT0L, r8 + mtspr DBAT1U, r8 + mtspr DBAT1L, r8 + mtspr DBAT2U, r8 + mtspr DBAT2L, r8 + mtspr DBAT3U, r8 + mtspr DBAT3L, r8 + mtspr IBAT0U, r8 + mtspr IBAT0L, r8 + mtspr IBAT1U, r8 + mtspr IBAT1L, r8 + mtspr IBAT2U, r8 + mtspr IBAT2L, r8 + mtspr IBAT3U, r8 + mtspr IBAT3L, r8 + + /* + * Memory Map + * start end size description + * 0x80000000 0x817fffff 24MB RAM, uncached + * 0xc0000000 0xc17fffff 24MB RAM, cached + * 0xc8000000 0xc81fffff 2MB Embedded Framebuffer + * 0xcc000000 Hardware registers + * 0xe0000000 Layer 2 transfer cache ??? 256KB + * + */ + + isync + lis r8, 0x8000 /* IBAT0,DBAT0 for first 16Mbytes */ + ori r8, r8, 0x01ff /* 16MB */ + mtspr IBAT0U, r8 + mtspr DBAT0U, r8 + li r8, 0x0002 /* rw */ + mtspr IBAT0L, r8 + mtspr DBAT0L, r8 + + lis r8, 0xc000 /* DBAT1 for IO mem */ + ori r8, r8, 0x1fff /* 256MB */ + mtspr DBAT1U, r8 + li r8, 0x002a /* uncached, guarded ,rw */ + mtspr DBAT1L, r8 + + lis r8, 0x8100 /* IBAT2,DBAT2 for next 8Mbytes */ + ori r8, r8, 0x00ff /* 8MB */ + mtspr IBAT2U, r8 + mtspr DBAT2U, r8 + lis r8, 0x0100 + ori r8, r8, 0x0002 /* rw */ + mtspr IBAT2L, r8 + mtspr DBAT2L, r8 + + lis r8, 0xe000 /* DBAT3 for layer 2 transfer cache ??? */ + ori r8, r8, 0x01fe /* 16MB ??? */ + mtspr DBAT3U, r8 + lis r8, 0xe000 + ori r8, r8, 0x0002 /* rw */ + mtspr DBAT3L, r8 + + sync + isync + +/* AFAIK, this is not strictly needed, although seems sane */ +#if 1 + li r9, 0 + + /* page table pointer */ + sync + mtspr SDR1, r9 + + /* segment registers */ + li r8, 16 + mtctr r8 + li r8, 0 +1: mtsrin r9, r8 /* zero */ + sync + addis r8,r8,0x1000 /* next register */ + bdnz 1b +#endif + + /* switch MMU on and continue */ + RELOC_SYM(1f) + mfmsr r0 + ori r0, r0, MSR_RI|MSR_ME|MSR_DR|MSR_IR + mtspr SRR1, r0 + oris r3, r3, 0x8000 /* adjust text address */ + mtspr SRR0, r3 + oris r1, r1, 0x8000 /* adjust stack */ + sync + rfi + +1: + /* from now on we run in a DOL-like environment */ + + + /* first, sanitize the hardware a little bit */ + /* although seems to be not needed in the general case */ + +#if 1 + /* audio */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + sth r9, 0x5036(r8) /* stop audio sample */ + stw r9, 0x6c00(r8) /* stop streaming */ + stw r9, 0x6c04(r8) /* mute */ + + /* video */ + mfspr r8, 920 /* spr920 = HID2 */ + rlwinm r8, r8, 0, 4, 2 /* stop GX FIFO, and more */ + mtspr 920, r8 + + /* exi */ + lis r8, 0xcc00 /* io mem */ +1: lwz r9,0x680c(r8) /* wait for dma transfer to complete */ + andi. r9,r9,1 + bne+ 1b + stw r9,0x6800(r8) /* disable exi interrupts */ + addi r8,r8,0x14 /* next channel */ + andi. r9,r8,0x40 /* XXX 4 channels? */ + beq+ 1b + + /* pic */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + stw r9, 0x3004(r8) /* mask all interrupts */ + stw r9, 0x3000(r8) /* clear interrupt cause */ + + /* invalidate L1 data and instructions caches */ + mfspr r8, HID0 + ori r8, r8, HID0_ICFI|HID0_DCI + mtspr HID0, r8 +#endif + + /* jump to our entry point */ + RELOC_SYM(setup_dol_regs) + mr r9, r3 + lwz r5, spr8 - setup_dol_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_dol_regs +setup_dol_regs: +spr8: .long 0x00000000 + + .balign 4 +//#include "isobel_reloc_debug_console.s" + +setup_dol_end: + + .globl setup_dol_size +setup_dol_size: + .long setup_dol_end - setup_dol_start + diff --git a/kexec/arch/ppc/ppc-setup-simple.S b/kexec/arch/ppc/ppc-setup-simple.S new file mode 100644 index 0000000..1317a8d --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-simple.S @@ -0,0 +1,39 @@ +/* + * ppc-setup-simple.S - (hopefully) setup for simple embedded platforms + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* + * Only suitable for platforms booting with MMU turned off. + * -- Albert Herranz + */ + +#include "ppc_asm.h" + + .data + .globl setup_simple_start +setup_simple_start: + + /* should perform here any required setup */ + + RELOC_SYM(setup_simple_regs) + mr r9, r3 + lwz r5, spr8 - setup_simple_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_simple_regs +setup_simple_regs: +spr8: .long 0x00000000 + +setup_simple_end: + + .globl setup_simple_size +setup_simple_size: + .long setup_simple_end - setup_simple_start + diff --git a/kexec/arch/ppc/ppc_asm.h b/kexec/arch/ppc/ppc_asm.h new file mode 100644 index 0000000..36503a9 --- /dev/null +++ b/kexec/arch/ppc/ppc_asm.h @@ -0,0 +1,506 @@ +/* + * ppc_asm.h - mainly bits stolen from Linux kernel asm/reg.h and asm/ppc_asm.h + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +/* Machine State Register (MSR) Fields */ +#define MSR_SF (1<<63) +#define MSR_ISF (1<<61) +#define MSR_VEC (1<<25) /* Enable AltiVec */ +#define MSR_POW (1<<18) /* Enable Power Management */ +#define MSR_WE (1<<18) /* Wait State Enable */ +#define MSR_TGPR (1<<17) /* TLB Update registers in use */ +#define MSR_CE (1<<17) /* Critical Interrupt Enable */ +#define MSR_ILE (1<<16) /* Interrupt Little Endian */ +#define MSR_EE (1<<15) /* External Interrupt Enable */ +#define MSR_PR (1<<14) /* Problem State / Privilege Level */ +#define MSR_FP (1<<13) /* Floating Point enable */ +#define MSR_ME (1<<12) /* Machine Check Enable */ +#define MSR_FE0 (1<<11) /* Floating Exception mode 0 */ +#define MSR_SE (1<<10) /* Single Step */ +#define MSR_BE (1<<9) /* Branch Trace */ +#define MSR_DE (1<<9) /* Debug Exception Enable */ +#define MSR_FE1 (1<<8) /* Floating Exception mode 1 */ +#define MSR_IP (1<<6) /* Exception prefix 0x000/0xFFF */ +#define MSR_IR (1<<5) /* Instruction Relocate */ +#define MSR_DR (1<<4) /* Data Relocate */ +#define MSR_PE (1<<3) /* Protection Enable */ +#define MSR_PX (1<<2) /* Protection Exclusive Mode */ +#define MSR_RI (1<<1) /* Recoverable Exception */ +#define MSR_LE (1<<0) /* Little Endian */ + +/* Special Purpose Registers (SPRNs)*/ +#define SPRN_CTR 0x009 /* Count Register */ +#define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ +#define SPRN_DAR 0x013 /* Data Address Register */ +#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ +#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ +#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ +#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ +#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ +#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ +#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ +#define SPRN_DBAT1U 0x21A /* Data BAT 1 Upper Register */ +#define SPRN_DBAT2L 0x21D /* Data BAT 2 Lower Register */ +#define SPRN_DBAT2U 0x21C /* Data BAT 2 Upper Register */ +#define SPRN_DBAT3L 0x21F /* Data BAT 3 Lower Register */ +#define SPRN_DBAT3U 0x21E /* Data BAT 3 Upper Register */ +#define SPRN_DBAT4L 0x239 /* Data BAT 4 Lower Register */ +#define SPRN_DBAT4U 0x238 /* Data BAT 4 Upper Register */ +#define SPRN_DBAT5L 0x23B /* Data BAT 5 Lower Register */ +#define SPRN_DBAT5U 0x23A /* Data BAT 5 Upper Register */ +#define SPRN_DBAT6L 0x23D /* Data BAT 6 Lower Register */ +#define SPRN_DBAT6U 0x23C /* Data BAT 6 Upper Register */ +#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ +#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ + +#define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define DER_RSTE 0x40000000 /* Reset Interrupt */ +#define DER_CHSTPE 0x20000000 /* Check Stop */ +#define DER_MCIE 0x10000000 /* Machine Check Interrupt */ +#define DER_EXTIE 0x02000000 /* External Interrupt */ +#define DER_ALIE 0x01000000 /* Alignment Interrupt */ +#define DER_PRIE 0x00800000 /* Program Interrupt */ +#define DER_FPUVIE 0x00400000 /* FP Unavailable Interrupt */ +#define DER_DECIE 0x00200000 /* Decrementer Interrupt */ +#define DER_SYSIE 0x00040000 /* System Call Interrupt */ +#define DER_TRE 0x00020000 /* Trace Interrupt */ +#define DER_SEIE 0x00004000 /* FP SW Emulation Interrupt */ +#define DER_ITLBMSE 0x00002000 /* Imp. Spec. Instruction TLB Miss */ +#define DER_ITLBERE 0x00001000 /* Imp. Spec. Instruction TLB Error */ +#define DER_DTLBMSE 0x00000800 /* Imp. Spec. Data TLB Miss */ +#define DER_DTLBERE 0x00000400 /* Imp. Spec. Data TLB Error */ +#define DER_LBRKE 0x00000008 /* Load/Store Breakpoint Interrupt */ +#define DER_IBRKE 0x00000004 /* Instruction Breakpoint Interrupt */ +#define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ +#define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ +#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ +#define SPRN_EAR 0x11A /* External Address Register */ +#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_EMCP (1<<31) /* Enable Machine Check pin */ +#define HID0_EBA (1<<29) /* Enable Bus Address Parity */ +#define HID0_EBD (1<<28) /* Enable Bus Data Parity */ +#define HID0_SBCLK (1<<27) +#define HID0_EICE (1<<26) +#define HID0_TBEN (1<<26) /* Timebase enable - 745x */ +#define HID0_ECLK (1<<25) +#define HID0_PAR (1<<24) +#define HID0_STEN (1<<24) /* Software table search enable - 745x */ +#define HID0_HIGH_BAT (1<<23) /* Enable high BATs - 7455 */ +#define HID0_DOZE (1<<23) +#define HID0_NAP (1<<22) +#define HID0_SLEEP (1<<21) +#define HID0_DPM (1<<20) +#define HID0_BHTCLR (1<<18) /* Clear branch history table - 7450 */ +#define HID0_XAEN (1<<17) /* Extended addressing enable - 7450 */ +#define HID0_NHR (1<<16) /* Not hard reset (software bit-7450)*/ +#define HID0_ICE (1<<15) /* Instruction Cache Enable */ +#define HID0_DCE (1<<14) /* Data Cache Enable */ +#define HID0_ILOCK (1<<13) /* Instruction Cache Lock */ +#define HID0_DLOCK (1<<12) /* Data Cache Lock */ +#define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ +#define HID0_DCI (1<<10) /* Data Cache Invalidate */ +#define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_SGE (1<<7) /* Store Gathering Enable */ +#define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ +#define HID0_DFCA (1<<6) /* Data Cache Flush Assist */ +#define HID0_LRSTK (1<<4) /* Link register stack - 745x */ +#define HID0_BTIC (1<<5) /* Branch Target Instr Cache Enable */ +#define HID0_ABE (1<<3) /* Address Broadcast Enable */ +#define HID0_FOLD (1<<3) /* Branch Folding enable - 745x */ +#define HID0_BHTE (1<<2) /* Branch History Table Enable */ +#define HID0_BTCD (1<<1) /* Branch target cache disable */ +#define HID0_NOPDST (1<<1) /* No-op dst, dstt, etc. instr. */ +#define HID0_NOPTI (1<<0) /* No-op dcbt and dcbst instr. */ + +#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ +#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */ +#define HID1_PC0 (1<<16) /* 7450 PLL_CFG[0] */ +#define HID1_PC1 (1<<15) /* 7450 PLL_CFG[1] */ +#define HID1_PC2 (1<<14) /* 7450 PLL_CFG[2] */ +#define HID1_PC3 (1<<13) /* 7450 PLL_CFG[3] */ +#define HID1_SYNCBE (1<<11) /* 7450 ABE for sync, eieio */ +#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ +#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ +#define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define SPRN_HID5 0x3F6 /* 970 HID5 */ +#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2) +#define SPRN_IAC1 0x3F4 /* Instruction Address Compare 1 */ +#define SPRN_IAC2 0x3F5 /* Instruction Address Compare 2 */ +#endif +#define SPRN_IBAT0L 0x211 /* Instruction BAT 0 Lower Register */ +#define SPRN_IBAT0U 0x210 /* Instruction BAT 0 Upper Register */ +#define SPRN_IBAT1L 0x213 /* Instruction BAT 1 Lower Register */ +#define SPRN_IBAT1U 0x212 /* Instruction BAT 1 Upper Register */ +#define SPRN_IBAT2L 0x215 /* Instruction BAT 2 Lower Register */ +#define SPRN_IBAT2U 0x214 /* Instruction BAT 2 Upper Register */ +#define SPRN_IBAT3L 0x217 /* Instruction BAT 3 Lower Register */ +#define SPRN_IBAT3U 0x216 /* Instruction BAT 3 Upper Register */ +#define SPRN_IBAT4L 0x231 /* Instruction BAT 4 Lower Register */ +#define SPRN_IBAT4U 0x230 /* Instruction BAT 4 Upper Register */ +#define SPRN_IBAT5L 0x233 /* Instruction BAT 5 Lower Register */ +#define SPRN_IBAT5U 0x232 /* Instruction BAT 5 Upper Register */ +#define SPRN_IBAT6L 0x235 /* Instruction BAT 6 Lower Register */ +#define SPRN_IBAT6U 0x234 /* Instruction BAT 6 Upper Register */ +#define SPRN_IBAT7L 0x237 /* Instruction BAT 7 Lower Register */ +#define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */ +#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ +#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */ +#define ICTRL_EICE 0x08000000 /* enable icache parity errs */ +#define ICTRL_EDC 0x04000000 /* enable dcache parity errs */ +#define ICTRL_EICP 0x00000100 /* enable icache par. check */ +#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ +#define SPRN_IMMR 0x27E /* Internal Memory Map Register */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR2 0x3f8 +#define L2CR_L2E 0x80000000 /* L2 enable */ +#define L2CR_L2PE 0x40000000 /* L2 parity enable */ +#define L2CR_L2SIZ_MASK 0x30000000 /* L2 size mask */ +#define L2CR_L2SIZ_256KB 0x10000000 /* L2 size 256KB */ +#define L2CR_L2SIZ_512KB 0x20000000 /* L2 size 512KB */ +#define L2CR_L2SIZ_1MB 0x30000000 /* L2 size 1MB */ +#define L2CR_L2CLK_MASK 0x0e000000 /* L2 clock mask */ +#define L2CR_L2CLK_DISABLED 0x00000000 /* L2 clock disabled */ +#define L2CR_L2CLK_DIV1 0x02000000 /* L2 clock / 1 */ +#define L2CR_L2CLK_DIV1_5 0x04000000 /* L2 clock / 1.5 */ +#define L2CR_L2CLK_DIV2 0x08000000 /* L2 clock / 2 */ +#define L2CR_L2CLK_DIV2_5 0x0a000000 /* L2 clock / 2.5 */ +#define L2CR_L2CLK_DIV3 0x0c000000 /* L2 clock / 3 */ +#define L2CR_L2RAM_MASK 0x01800000 /* L2 RAM type mask */ +#define L2CR_L2RAM_FLOW 0x00000000 /* L2 RAM flow through */ +#define L2CR_L2RAM_PIPE 0x01000000 /* L2 RAM pipelined */ +#define L2CR_L2RAM_PIPE_LW 0x01800000 /* L2 RAM pipelined latewr */ +#define L2CR_L2DO 0x00400000 /* L2 data only */ +#define L2CR_L2I 0x00200000 /* L2 global invalidate */ +#define L2CR_L2CTL 0x00100000 /* L2 RAM control */ +#define L2CR_L2WT 0x00080000 /* L2 write-through */ +#define L2CR_L2TS 0x00040000 /* L2 test support */ +#define L2CR_L2OH_MASK 0x00030000 /* L2 output hold mask */ +#define L2CR_L2OH_0_5 0x00000000 /* L2 output hold 0.5 ns */ +#define L2CR_L2OH_1_0 0x00010000 /* L2 output hold 1.0 ns */ +#define L2CR_L2SL 0x00008000 /* L2 DLL slow */ +#define L2CR_L2DF 0x00004000 /* L2 differential clock */ +#define L2CR_L2BYP 0x00002000 /* L2 DLL bypass */ +#define L2CR_L2IP 0x00000001 /* L2 GI in progress */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define L3CR_L3E 0x80000000 /* L3 enable */ +#define L3CR_L3PE 0x40000000 /* L3 data parity enable */ +#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ +#define L3CR_L3SIZ 0x10000000 /* L3 size */ +#define L3CR_L3CLKEN 0x08000000 /* L3 clock enable */ +#define L3CR_L3RES 0x04000000 /* L3 special reserved bit */ +#define L3CR_L3CLKDIV 0x03800000 /* L3 clock divisor */ +#define L3CR_L3IO 0x00400000 /* L3 instruction only */ +#define L3CR_L3SPO 0x00040000 /* L3 sample point override */ +#define L3CR_L3CKSP 0x00030000 /* L3 clock sample point */ +#define L3CR_L3PSP 0x0000e000 /* L3 P-clock sample point */ +#define L3CR_L3REP 0x00001000 /* L3 replacement algorithm */ +#define L3CR_L3HWF 0x00000800 /* L3 hardware flush */ +#define L3CR_L3I 0x00000400 /* L3 global invalidate */ +#define L3CR_L3RT 0x00000300 /* L3 SRAM type */ +#define L3CR_L3NIRCA 0x00000080 /* L3 non-integer ratio clock adj. */ +#define L3CR_L3DO 0x00000040 /* L3 data only mode */ +#define L3CR_PMEN 0x00000004 /* L3 private memory enable */ +#define L3CR_PMSIZ 0x00000001 /* L3 private memory size */ +#define SPRN_MSSCR0 0x3f6 /* Memory Subsystem Control Register 0 */ +#define SPRN_MSSSR0 0x3f7 /* Memory Subsystem Status Register 1 */ +#define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ +#define SPRN_LDSTDB 0x3f4 /* */ +#define SPRN_LR 0x008 /* Link Register */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ +#ifndef SPRN_PIR +#define SPRN_PIR 0x3FF /* Processor Identification Register */ +#endif +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ +#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ +#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define SPRN_RPA 0x3D6 /* Required Physical Address Register */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ +#define SPRN_SDR1 0x019 /* MMU Hash Base Register */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ +#define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ +#define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ +#define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ +#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ +#define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */ +#define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */ +#define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */ +#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ +#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ +#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_THRM1 0x3FC /* Thermal Management Register 1 */ +/* these bits were defined in inverted endian sense originally, ugh, confusing */ +#define THRM1_TIN (1 << 31) +#define THRM1_TIV (1 << 30) +#define THRM1_THRES(x) ((x&0x7f)<<23) +#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM1_TID (1<<2) +#define THRM1_TIE (1<<1) +#define THRM1_V (1<<0) +#define SPRN_THRM2 0x3FD /* Thermal Management Register 2 */ +#define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ +#define THRM3_E (1<<0) +#define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ +#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ +#define SPRN_XER 0x001 /* Fixed Point Exception Register */ + +/* Bit definitions for MMCR0 and PMC1 / PMC2. */ +#define MMCR0_PMC1_CYCLES (1 << 7) +#define MMCR0_PMC1_ICACHEMISS (5 << 7) +#define MMCR0_PMC1_DTLB (6 << 7) +#define MMCR0_PMC2_DCACHEMISS 0x6 +#define MMCR0_PMC2_CYCLES 0x1 +#define MMCR0_PMC2_ITLB 0x7 +#define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Short-hand versions for a number of the above SPRNs */ +#define CTR SPRN_CTR /* Counter Register */ +#define DAR SPRN_DAR /* Data Address Register */ +#define DABR SPRN_DABR /* Data Address Breakpoint Register */ +#define DBAT0L SPRN_DBAT0L /* Data BAT 0 Lower Register */ +#define DBAT0U SPRN_DBAT0U /* Data BAT 0 Upper Register */ +#define DBAT1L SPRN_DBAT1L /* Data BAT 1 Lower Register */ +#define DBAT1U SPRN_DBAT1U /* Data BAT 1 Upper Register */ +#define DBAT2L SPRN_DBAT2L /* Data BAT 2 Lower Register */ +#define DBAT2U SPRN_DBAT2U /* Data BAT 2 Upper Register */ +#define DBAT3L SPRN_DBAT3L /* Data BAT 3 Lower Register */ +#define DBAT3U SPRN_DBAT3U /* Data BAT 3 Upper Register */ +#define DBAT4L SPRN_DBAT4L /* Data BAT 4 Lower Register */ +#define DBAT4U SPRN_DBAT4U /* Data BAT 4 Upper Register */ +#define DBAT5L SPRN_DBAT5L /* Data BAT 5 Lower Register */ +#define DBAT5U SPRN_DBAT5U /* Data BAT 5 Upper Register */ +#define DBAT6L SPRN_DBAT6L /* Data BAT 6 Lower Register */ +#define DBAT6U SPRN_DBAT6U /* Data BAT 6 Upper Register */ +#define DBAT7L SPRN_DBAT7L /* Data BAT 7 Lower Register */ +#define DBAT7U SPRN_DBAT7U /* Data BAT 7 Upper Register */ +#define DEC SPRN_DEC /* Decrement Register */ +#define DMISS SPRN_DMISS /* Data TLB Miss Register */ +#define DSISR SPRN_DSISR /* Data Storage Interrupt Status Register */ +#define EAR SPRN_EAR /* External Address Register */ +#define HASH1 SPRN_HASH1 /* Primary Hash Address Register */ +#define HASH2 SPRN_HASH2 /* Secondary Hash Address Register */ +#define HID0 SPRN_HID0 /* Hardware Implementation Register 0 */ +#define HID1 SPRN_HID1 /* Hardware Implementation Register 1 */ +#define IABR SPRN_IABR /* Instruction Address Breakpoint Register */ +#define IBAT0L SPRN_IBAT0L /* Instruction BAT 0 Lower Register */ +#define IBAT0U SPRN_IBAT0U /* Instruction BAT 0 Upper Register */ +#define IBAT1L SPRN_IBAT1L /* Instruction BAT 1 Lower Register */ +#define IBAT1U SPRN_IBAT1U /* Instruction BAT 1 Upper Register */ +#define IBAT2L SPRN_IBAT2L /* Instruction BAT 2 Lower Register */ +#define IBAT2U SPRN_IBAT2U /* Instruction BAT 2 Upper Register */ +#define IBAT3L SPRN_IBAT3L /* Instruction BAT 3 Lower Register */ +#define IBAT3U SPRN_IBAT3U /* Instruction BAT 3 Upper Register */ +#define IBAT4L SPRN_IBAT4L /* Instruction BAT 4 Lower Register */ +#define IBAT4U SPRN_IBAT4U /* Instruction BAT 4 Upper Register */ +#define IBAT5L SPRN_IBAT5L /* Instruction BAT 5 Lower Register */ +#define IBAT5U SPRN_IBAT5U /* Instruction BAT 5 Upper Register */ +#define IBAT6L SPRN_IBAT6L /* Instruction BAT 6 Lower Register */ +#define IBAT6U SPRN_IBAT6U /* Instruction BAT 6 Upper Register */ +#define IBAT7L SPRN_IBAT7L /* Instruction BAT 7 Lower Register */ +#define IBAT7U SPRN_IBAT7U /* Instruction BAT 7 Upper Register */ +#define ICMP SPRN_ICMP /* Instruction TLB Compare Register */ +#define IMISS SPRN_IMISS /* Instruction TLB Miss Register */ +#define IMMR SPRN_IMMR /* PPC 860/821 Internal Memory Map Register */ +#define L2CR SPRN_L2CR /* Classic PPC L2 cache control register */ +#define L3CR SPRN_L3CR /* PPC 745x L3 cache control register */ +#define LR SPRN_LR +#define PVR SPRN_PVR /* Processor Version */ +#define RPA SPRN_RPA /* Required Physical Address Register */ +#define SDR1 SPRN_SDR1 /* MMU hash base register */ +#define SPR0 SPRN_SPRG0 /* Supervisor Private Registers */ +#define SPR1 SPRN_SPRG1 +#define SPR2 SPRN_SPRG2 +#define SPR3 SPRN_SPRG3 +#define SPR4 SPRN_SPRG4 +#define SPR5 SPRN_SPRG5 +#define SPR6 SPRN_SPRG6 +#define SPR7 SPRN_SPRG7 +#define SPRG0 SPRN_SPRG0 +#define SPRG1 SPRN_SPRG1 +#define SPRG2 SPRN_SPRG2 +#define SPRG3 SPRN_SPRG3 +#define SPRG4 SPRN_SPRG4 +#define SPRG5 SPRN_SPRG5 +#define SPRG6 SPRN_SPRG6 +#define SPRG7 SPRN_SPRG7 +#define SRR0 SPRN_SRR0 /* Save and Restore Register 0 */ +#define SRR1 SPRN_SRR1 /* Save and Restore Register 1 */ +#define SRR2 SPRN_SRR2 /* Save and Restore Register 2 */ +#define SRR3 SPRN_SRR3 /* Save and Restore Register 3 */ +#define ICTC SPRN_ICTC /* Instruction Cache Throttling Control Reg */ +#define THRM1 SPRN_THRM1 /* Thermal Management Register 1 */ +#define THRM2 SPRN_THRM2 /* Thermal Management Register 2 */ +#define THRM3 SPRN_THRM3 /* Thermal Management Register 3 */ +#define XER SPRN_XER +#define TBRL SPRN_TBRL /* Time Base Read Lower Register */ +#define TBRU SPRN_TBRU /* Time Base Read Upper Register */ +#define TBWL SPRN_TBWL /* Time Base Write Lower Register */ +#define TBWU SPRN_TBWU /* Time Base Write Upper Register */ + +/* Processor Version Register */ + +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +/* + * IBM has further subdivided the standard PowerPC 16-bit version and + * revision subfields of the PVR for the PowerPC 403s into the following: + */ + +#define PVR_FAM(pvr) (((pvr) >> 20) & 0xFFF) /* Family field */ +#define PVR_MEM(pvr) (((pvr) >> 16) & 0xF) /* Member field */ +#define PVR_CORE(pvr) (((pvr) >> 12) & 0xF) /* Core field */ +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + +/* Processor Version Numbers */ + +#define PVR_403GA 0x00200000 +#define PVR_403GB 0x00200100 +#define PVR_403GC 0x00200200 +#define PVR_403GCX 0x00201400 +#define PVR_405GP 0x40110000 +#define PVR_STB03XXX 0x40310000 +#define PVR_NP405H 0x41410000 +#define PVR_NP405L 0x41610000 +#define PVR_440GP_RB 0x40120440 +#define PVR_440GP_RC1 0x40120481 +#define PVR_440GP_RC2 0x40200481 +#define PVR_440GX_RA 0x51b21850 +#define PVR_440GX_RB 0x51b21851 +#define PVR_440GX_RB1 0x51b21852 +#define PVR_601 0x00010000 +#define PVR_602 0x00050000 +#define PVR_603 0x00030000 +#define PVR_603e 0x00060000 +#define PVR_603ev 0x00070000 +#define PVR_603r 0x00071000 +#define PVR_604 0x00040000 +#define PVR_604e 0x00090000 +#define PVR_604r 0x000A0000 +#define PVR_620 0x00140000 +#define PVR_740 0x00080000 +#define PVR_750 PVR_740 +#define PVR_740P 0x10080000 +#define PVR_750P PVR_740P +#define PVR_7400 0x000C0000 +#define PVR_7410 0x800C0000 +#define PVR_7450 0x80000000 +/* + * For the 8xx processors, all of them report the same PVR family for + * the PowerPC core. The various versions of these processors must be + * differentiated by the version number in the Communication Processor + * Module (CPM). + */ +#define PVR_821 0x00500000 +#define PVR_823 PVR_821 +#define PVR_850 PVR_821 +#define PVR_860 PVR_821 +#define PVR_8240 0x00810100 +#define PVR_8245 0x80811014 +#define PVR_8260 PVR_8240 + +/* Segment Registers */ +#define SR0 0 +#define SR1 1 +#define SR2 2 +#define SR3 3 +#define SR4 4 +#define SR5 5 +#define SR6 6 +#define SR7 7 +#define SR8 8 +#define SR9 9 +#define SR10 10 +#define SR11 11 +#define SR12 12 +#define SR13 13 +#define SR14 14 +#define SR15 15 + + +/* returns r3 = relocated address of sym */ +/* modifies r0 */ +#define RELOC_SYM(sym) \ + mflr r3; \ + bl 1f; \ +1: mflr r0; \ + mtlr r3; \ + lis r3, 1b@ha; \ + ori r3, r3, 1b@l; \ + subf r0, r3, r0; \ + lis r3, sym@ha; \ + ori r3, r3, sym@l; \ + add r3, r3, r0 + diff --git a/kexec/arch/ppc64/Makefile b/kexec/arch/ppc64/Makefile new file mode 100644 index 0000000..9caf501 --- /dev/null +++ b/kexec/arch/ppc64/Makefile @@ -0,0 +1,26 @@ +# +# kexec ppc64 (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc64_KEXEC_SRCS = kexec/arch/ppc64/kexec-elf-rel-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-zImage-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-elf-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/crashdump-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/fdt.c +ppc64_KEXEC_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +ppc64_ARCH_REUSE_INITRD = + +ppc64_FS2DT = kexec/fs2dt.c +ppc64_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/ppc64/crashdump-ppc64.h \ + -include $(srcdir)/kexec/arch/ppc64/kexec-ppc64.h + +ppc64_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +dist += kexec/arch/ppc64/Makefile $(ppc64_KEXEC_SRCS) \ + kexec/arch/ppc64/kexec-ppc64.h kexec/arch/ppc64/crashdump-ppc64.h \ + kexec/arch/ppc64/include/arch/fdt.h \ + kexec/arch/ppc64/include/arch/options.h + diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c new file mode 100644 index 0000000..6d47898 --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -0,0 +1,644 @@ +/* + * kexec: Linux boots Linux + * + * Created by: R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +unsigned int num_of_lmb_sets; +unsigned int is_dyn_mem_v2; +uint64_t lmb_size; + +static struct crash_elf_info elf_info64 = +{ + class: ELFCLASS64, +#if BYTE_ORDER == LITTLE_ENDIAN + data: ELFDATA2LSB, +#else + data: ELFDATA2MSB, +#endif + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2MSB, + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +extern struct arch_options_t arch_options; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range = NULL; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +static unsigned long long cstart, cend; +static int memory_ranges; + +/* + * Exclude the region that lies within crashkernel and above the memory + * limit which is reflected by mem= kernel option. + */ +static void exclude_crash_region(uint64_t start, uint64_t end) +{ + /* If memory_limit is set then exclude the memory region above it. */ + if (memory_limit) { + if (start >= memory_limit) + return; + if (end > memory_limit) + end = memory_limit; + } + + if (cstart < end && cend > start) { + if (start < cstart && end > cend) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (start < cstart) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (end > cend) { + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } + } else { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } +} + +static int get_dyn_reconf_crash_memory_ranges(void) +{ + uint64_t start, end; + uint64_t startrange, endrange; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + uint32_t flags; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if (is_dyn_mem_v2) + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + fseek(file, 4, SEEK_SET); + startrange = endrange = 0; + size = lmb_size; + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS]))); + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((flags & 0x80) || !(flags & 0x8)) + continue; + + if (start != endrange) { + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + startrange = start; + } + endrange = end; + } + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + + fclose(file); + return 0; +} + +/* + * For a given memory node, check if it is mapped to system RAM or + * to onboard memory on accelerator device like GPU card or such. + */ +static int is_coherent_device_mem(const char *fname) +{ + char fpath[PATH_LEN]; + char buf[32]; + DIR *dmem; + FILE *file; + struct dirent *mentry; + int cnt, ret = 0; + + strcpy(fpath, fname); + if ((dmem = opendir(fpath)) == NULL) { + perror(fpath); + return -1; + } + + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "compatible")) + continue; + + strcat(fpath, "/compatible"); + if ((file = fopen(fpath, "r")) == NULL) { + perror(fpath); + ret = -1; + break; + } + if ((cnt = fread(buf, 1, 32, file)) < 0) { + perror(fpath); + fclose(file); + ret = -1; + break; + } + if (!strncmp(buf, "ibm,coherent-device-memory", 26)) { + fclose(file); + ret = 1; + break; + } + fclose(file); + } + + closedir(dmem); + return ret; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[PATH_LEN]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n, ret, crash_rng_len = 0; + unsigned long long start, end; + int page_size; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + + /* create a separate program header for the backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END + 1; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + goto err; + } + + cstart = crash_base; + cend = crash_base + crash_size; + + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)){ + get_dyn_reconf_crash_memory_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + + ret = is_coherent_device_mem(fname); + if (ret == -1) { + closedir(dir); + goto err; + } else if (ret == 1) { + /* + * Avoid adding this memory region as it is not + * mapped to system RAM. + */ + continue; + } + + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + goto err; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + start = be64_to_cpu(((unsigned long long *)buf)[0]); + end = start + + be64_to_cpu(((unsigned long long *)buf)[1]); + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + exclude_crash_region(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The rtas section created here is formed by reading rtas-base + * and rtas-size from /proc/device-tree/rtas. Unfortunately + * rtas-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in rtas-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + + /* + * If OPAL region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < opal_base + opal_size && + opal_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = opal_base; + cend = opal_base + opal_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The opal section created here is formed by reading opal-base + * and opal-size from /proc/device-tree/ibm,opal. Unfortunately + * opal-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in opal-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for(j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdline_size, cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + uint64_t max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz; + uint64_t elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); + + /* On ppc64 memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free (tmp); + return -1; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base, + max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + unsigned int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i=0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx\n", + i, base, size); +#endif + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx", + i, base, usablemem_rgns.ranges[i].end - base); +#endif + return; + } else if (end > uend){ + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new end:%llx, new size:%llx", + i, end, end - usablemem_rgns.ranges[i].start); +#endif + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = be64_to_cpu(value); + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + be64_to_cpu(value) - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + +#if 0 +static int sort_regions(mem_rgns_t *rgn) +{ + int i, j; + unsigned long long tstart, tend; + for (i = 0; i < rgn->size; i++) { + for (j = 0; j < rgn->size - i - 1; j++) { + if (rgn->ranges[j].start > rgn->ranges[j+1].start) { + tstart = rgn->ranges[j].start; + tend = rgn->ranges[j].end; + rgn->ranges[j].start = rgn->ranges[j+1].start; + rgn->ranges[j].end = rgn->ranges[j+1].end; + rgn->ranges[j+1].start = tstart; + rgn->ranges[j+1].end = tend; + } + } + } + return 0; + +} +#endif + diff --git a/kexec/arch/ppc64/crashdump-ppc64.h b/kexec/arch/ppc64/crashdump-ppc64.h new file mode 100644 index 0000000..b0cba8a --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.h @@ -0,0 +1,51 @@ +#ifndef CRASHDUMP_PPC64_H +#define CRASHDUMP_PPC64_H + +#include <stdint.h> +#include <sys/types.h> + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + uint64_t max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +#define PAGE_OFFSET 0xC000000000000000ULL +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE 0xD000000000000000ULL + +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define MAXMEM (-(unsigned long)(KERNELBASE-VMALLOCBASE)) + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +#define KERNEL_RUN_AT_ZERO_MAGIC 0x72756e30 /* "run0" */ + +extern uint64_t crash_base; +extern uint64_t crash_size; +extern uint64_t memory_limit; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; + +/* + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB + * sets where each set represents a group of sequential LMB entries. In + * case of ibm,dynamic-memory property, the number of LMB sets is nothing + * but the total number of LMB entries. + */ +extern unsigned int num_of_lmb_sets; +extern unsigned int is_dyn_mem_v2; +extern uint64_t lmb_size; + +#define LMB_ENTRY_SIZE 24 +#define DRCONF_ADDR (is_dyn_mem_v2 ? 4 : 0) +#define DRCONF_FLAGS 20 + +#endif /* CRASHDUMP_PPC64_H */ diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c new file mode 100644 index 0000000..8bc6d2d --- /dev/null +++ b/kexec/arch/ppc64/fdt.c @@ -0,0 +1,78 @@ +/* + * ppc64 fdt fixups + * + * Copyright 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <arch/fdt.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +/* + * Let the kernel know it booted from kexec, as some things (e.g. + * secondary CPU release) may work differently. + */ +static int fixup_kexec_prop(void *fdt) +{ + int err, nodeoffset; + + nodeoffset = fdt_subnode_offset(fdt, 0, "chosen"); + if (nodeoffset < 0) + nodeoffset = fdt_add_subnode(fdt, 0, "chosen"); + if (nodeoffset < 0) { + printf("%s: add /chosen %s\n", __func__, + fdt_strerror(nodeoffset)); + return -1; + } + + err = fdt_setprop(fdt, nodeoffset, "linux,booted-from-kexec", + NULL, 0); + if (err < 0) { + printf("%s: couldn't write linux,booted-from-kexec: %s\n", + __func__, fdt_strerror(err)); + return -1; + } + + return 0; +} + + +/* + * For now, assume that the added content fits in the file. + * This should be the case when flattening from /proc/device-tree, + * and when passing in a dtb, dtc can be told to add padding. + */ +int fixup_dt(char **fdt, off_t *size) +{ + int ret; + + *size += 4096; + *fdt = realloc(*fdt, *size); + if (!*fdt) { + fprintf(stderr, "%s: out of memory\n", __func__); + return -1; + } + + ret = fdt_open_into(*fdt, *fdt, *size); + if (ret < 0) { + fprintf(stderr, "%s: fdt_open_into: %s\n", __func__, + fdt_strerror(ret)); + return -1; + } + + ret = fixup_kexec_prop(*fdt); + if (ret < 0) + return ret; + + return 0; +} diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h new file mode 100644 index 0000000..b19f185 --- /dev/null +++ b/kexec/arch/ppc64/include/arch/fdt.h @@ -0,0 +1,8 @@ +#ifndef KEXEC_ARCH_PPC64_FDT +#define KEXEC_ARCH_PPC64_FDT + +#include <sys/types.h> + +int fixup_dt(char **fdt, off_t *size); + +#endif diff --git a/kexec/arch/ppc64/include/arch/options.h b/kexec/arch/ppc64/include/arch/options.h new file mode 100644 index 0000000..2bca96a --- /dev/null +++ b/kexec/arch/ppc64/include/arch/options.h @@ -0,0 +1,51 @@ +#ifndef KEXEC_ARCH_PPC64_OPTIONS_H +#define KEXEC_ARCH_PPC64_OPTIONS_H + +#define OPT_ELF64_CORE (OPT_MAX+0) +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+1) +#define OPT_ARCH_MAX (OPT_MAX+2) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_DEVICETREEBLOB (OPT_ARCH_MAX+2) +#define OPT_ARGS_IGNORE (OPT_ARCH_MAX+3) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + + +#endif /* KEXEC_ARCH_PPC64_OPTIONS_H */ diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c new file mode 100644 index 0000000..01d045f --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -0,0 +1,496 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <libfdt.h> +#include <arch/fdt.h> +#include <arch/options.h> + +uint64_t initrd_base, initrd_size; +unsigned char reuse_initrd = 0; +const char *ramdisk; + +int elf_ppc64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if ((ehdr.e_machine != EM_PPC64) && (ehdr.e_machine != EM_PPC)) { + /* for a different architecture */ + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +static int read_prop(char *name, void *value, size_t len) +{ + int fd; + size_t rlen; + + fd = open(name, O_RDONLY); + if (fd == -1) + return -1; + + rlen = read(fd, value, len); + if (rlen < 0) + fprintf(stderr, "Warning : Can't read %s : %s", + name, strerror(errno)); + else if (rlen != len) + fprintf(stderr, "Warning : short read from %s", name); + + close(fd); + return 0; +} + +static int elf_ppc64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *cmdline, *dtb; + char *append_cmdline = NULL; + char *reuse_cmdline = NULL; + int opt, cmdline_len = 0; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* Parse command line arguments */ + cmdline = 0; + dtb = 0; + ramdisk = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + dtb = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + if (dtb) + die("--dtb not supported while using --kexec-file-syscall.\n"); + + if (reuse_initrd) + die("--reuseinitrd not supported with --kexec-file-syscall.\n"); + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (cmdline) { + cmdline_len = strlen(cmdline) + 1; + } else { + cmdline = strdup("\0"); + cmdline_len = 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = cmdline; + info->command_line_len = cmdline_len; + return ret; +out: + if (cmdline_len == 1) + free(cmdline); + return ret; +} + +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline, *modified_cmdline = NULL; + char *reuse_cmdline = NULL; + char *append_cmdline = NULL; + const char *devicetreeblob; + uint64_t max_addr, hole_addr; + char *seg_buf = NULL; + off_t seg_size = 0; + struct mem_phdr *phdr; + size_t size; +#ifdef NEED_RESERVE_DTB + uint64_t *rsvmap_ptr; + struct bootblock *bb_ptr; +#endif + int result, opt; + uint64_t my_kernel, my_dt_offset; + uint64_t my_opal_base = 0, my_opal_entry = 0; + unsigned int my_panic_kernel; + uint64_t my_stack, my_backup_start; + uint64_t toc_addr; + uint32_t my_run_at_load; + unsigned int slave_code[256/sizeof (unsigned int)], master_entry; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + if (info->file_mode) + return elf_ppc64_load_file(argc, argv, info); + + /* Parse command line arguments */ + initrd_base = 0; + initrd_size = 0; + cmdline = 0; + ramdisk = 0; + devicetreeblob = 0; + max_addr = 0xFFFFFFFFFFFFFFFFULL; + hole_addr = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + devicetreeblob = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (!cmdline) + fprintf(stdout, "Warning: append= option is not passed. Using the first kernel root partition\n"); + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (cmdline) { + strncpy(modified_cmdline, cmdline, COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + my_kernel = hole_addr = locate_hole(info, size, 0, 0, max_addr, 1); + ehdr.e_phdr[0].p_paddr = hole_addr; + result = elf_exec_load(&ehdr, info); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (result < 0) + return -1; + /* Use new command line. */ + cmdline = modified_cmdline; + } + + /* Add v2wrap to the current image */ + elf_rel_build_load(info, &info->rhdr, purgatory, + purgatory_size, 0, max_addr, 1, 0); + + /* Add a ram-disk to the current image + * Note: Add the ramdisk after elf_rel_build_load + */ + if (ramdisk) { + if (devicetreeblob) { + fprintf(stderr, + "Can't use ramdisk with device tree blob input\n"); + return -1; + } + seg_buf = slurp_file(ramdisk, &seg_size); + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, 1); + initrd_base = hole_addr; + initrd_size = seg_size; + } /* ramdisk */ + + if (devicetreeblob) { + /* Grab device tree from buffer */ + seg_buf = slurp_file(devicetreeblob, &seg_size); + } else { + /* create from fs2dt */ + create_flatten_tree(&seg_buf, &seg_size, cmdline); + } + + result = fixup_dt(&seg_buf, &seg_size); + if (result < 0) + return result; + + my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, -1); + +#ifdef NEED_RESERVE_DTB + /* patch reserve map address for flattened device-tree + * find last entry (both 0) in the reserve mem list. Assume DT + * entry is before this one + */ + bb_ptr = (struct bootblock *)(seg_buf); + rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap)); + while (*rsvmap_ptr || *(rsvmap_ptr+1)) + rsvmap_ptr += 2; + rsvmap_ptr -= 2; + *rsvmap_ptr = cpu_to_be64(my_dt_offset); + rsvmap_ptr++; + *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize)); +#endif + + if (read_prop("/proc/device-tree/ibm,opal/opal-base-address", + &my_opal_base, sizeof(my_opal_base)) == 0) { + my_opal_base = be64_to_cpu(my_opal_base); + elf_rel_set_symbol(&info->rhdr, "opal_base", + &my_opal_base, sizeof(my_opal_base)); + } + + if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address", + &my_opal_entry, sizeof(my_opal_entry)) == 0) { + my_opal_entry = be64_to_cpu(my_opal_entry); + elf_rel_set_symbol(&info->rhdr, "opal_entry", + &my_opal_entry, sizeof(my_opal_entry)); + } + + /* Set kernel */ + elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + + /* Set dt_offset */ + elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + + /* get slave code from new kernel, put in purgatory */ + elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + master_entry = slave_code[0]; + memcpy(slave_code, phdr->p_data, sizeof(slave_code)); + slave_code[0] = master_entry; + elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + my_panic_kernel = 1; + /* Set panic flag */ + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &my_panic_kernel, sizeof(my_panic_kernel)); + + /* Set backup address */ + my_backup_start = info->backup_start; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &my_backup_start, sizeof(my_backup_start)); + + /* Tell relocatable kernel to run at load address + * via word before slave code in purgatory + */ + + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + if (my_run_at_load == KERNEL_RUN_AT_ZERO_MAGIC) + my_run_at_load = 1; + /* else it should be a fixed offset image */ + elf_rel_set_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + } + + /* Set stack address */ + my_stack = locate_hole(info, 16*1024, 0, 0, max_addr, 1); + my_stack += 16*1024; + elf_rel_set_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + + /* Set toc */ + toc_addr = my_r2(&info->rhdr); + elf_rel_set_symbol(&info->rhdr, "my_toc", &toc_addr, sizeof(toc_addr)); + + /* Set debug */ + elf_rel_set_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + my_kernel = 0; + my_dt_offset = 0; + my_panic_kernel = 0; + my_backup_start = 0; + my_stack = 0; + toc_addr = 0; + my_run_at_load = 0; + my_debug = 0; + my_opal_base = 0; + my_opal_entry = 0; + + elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base, + sizeof(my_opal_base)); + elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry, + sizeof(my_opal_entry)); + elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel, + sizeof(my_panic_kernel)); + elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start, + sizeof(my_backup_start)); + elf_rel_get_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + elf_rel_get_symbol(&info->rhdr, "my_toc", &toc_addr, + sizeof(toc_addr)); + elf_rel_get_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + dbgprintf("info->entry is %p\n", info->entry); + dbgprintf("kernel is %llx\n", (unsigned long long)my_kernel); + dbgprintf("dt_offset is %llx\n", + (unsigned long long)my_dt_offset); + dbgprintf("run_at_load flag is %x\n", my_run_at_load); + dbgprintf("panic_kernel is %x\n", my_panic_kernel); + dbgprintf("backup_start is %llx\n", + (unsigned long long)my_backup_start); + dbgprintf("stack is %llx\n", (unsigned long long)my_stack); + dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr); + dbgprintf("purgatory size is %zu\n", purgatory_size); + dbgprintf("debug is %d\n", my_debug); + dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base); + dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry); + + return 0; +} + +void elf_ppc64_usage(void) +{ + fprintf(stderr, " --command-line=<Command line> command line to append.\n"); + fprintf(stderr, " --append=<Command line> same as --command-line.\n"); + fprintf(stderr, " --ramdisk=<filename> Initial RAM disk.\n"); + fprintf(stderr, " --initrd=<filename> same as --ramdisk.\n"); + fprintf(stderr, " --devicetreeblob=<filename> Specify device tree blob file.\n"); + fprintf(stderr, " "); + fprintf(stderr, "Not applicable while using --kexec-file-syscall.\n"); + fprintf(stderr, " --reuse-cmdline Use kernel command line from running system.\n"); + fprintf(stderr, " --dtb=<filename> same as --devicetreeblob.\n"); + + fprintf(stderr, "elf support is still broken\n"); +} diff --git a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c new file mode 100644 index 0000000..51b1354 --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c @@ -0,0 +1,204 @@ +#include <stdio.h> +#include <elf.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc64.h" + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STO_PPC64_LOCAL_BIT 5 +#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other) \ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(struct mem_sym *sym) +{ + /* If this symbol has a local entry point, use it. */ + return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +static unsigned int local_entry_offset(struct mem_sym *UNUSED(sym)) +{ + return 0; +} +#endif + +static struct mem_shdr *toc_section(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr, *shdr_end; + unsigned char *strtab; + + strtab = (unsigned char *)ehdr->e_shdr[ehdr->e_shstrndx].sh_data; + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + for (shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (shdr->sh_size && + strcmp((char *)&strtab[shdr->sh_name], ".toc") == 0) { + return shdr; + } + } + + return NULL; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + struct mem_shdr *toc; + + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_PPC64) { + return 0; + } + + /* Ensure .toc is sufficiently aligned. */ + toc = toc_section(ehdr); + if (toc && toc->sh_addralign < 256) + toc->sh_addralign = 256; + return 1; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +unsigned long my_r2(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr; + + shdr = toc_section(ehdr); + if (!shdr) { + die("TOC reloc without a toc section?"); + } + + return shdr->sh_addr + 0x8000; +} + +static void do_relative_toc(unsigned long value, uint16_t *location, + unsigned long mask, int complain_signed) +{ + if (complain_signed && (value + 0x8000 > 0xffff)) { + die("TOC16 relocation overflows (%lu)\n", value); + } + + if ((~mask & 0xffff) & value) { + die("bad TOC16 relocation (%lu)\n", value); + } + + *location = (*location & ~mask) | (value & mask); +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *sym, + unsigned long r_type, void *location, unsigned long address, + unsigned long value) +{ + switch(r_type) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC64_ADDR64: + case R_PPC64_REL64: + /* Simply set it */ + *(uint64_t *)location = value; + break; + + case R_PPC64_REL32: + *(uint32_t *)location = value - (uint32_t)location; + break; + + case R_PPC64_TOC: + *(uint64_t *)location = my_r2(ehdr); + break; + + case R_PPC64_TOC16: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 1); + break; + + case R_PPC64_TOC16_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 1); + break; + + case R_PPC64_TOC16_LO: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 0); + break; + + case R_PPC64_TOC16_LO_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 0); + break; + + case R_PPC64_TOC16_HI: + do_relative_toc((value - my_r2(ehdr)) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_TOC16_HA: + do_relative_toc((value - my_r2(ehdr) + 0x8000) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_REL24: + value += local_entry_offset(sym); + /* Convert value to relative */ + value -= address; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0) { + die("REL24 %li out of range!\n", (long int)value); + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | + (value & 0x03fffffc); + break; + + case R_PPC64_ADDR16_LO: + *(uint16_t *)location = value & 0xffff; + break; + + case R_PPC64_ADDR16_HI: + *(uint16_t *)location = (value >> 16) & 0xffff; + break; + + case R_PPC64_ADDR16_HA: + *(uint16_t *)location = (((value + 0x8000) >> 16) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHER: + *(uint16_t *)location = (((uint64_t)value >> 32) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(uint16_t *)location = (((uint64_t)value >> 48) & 0xffff); + break; + + /* R_PPC64_REL16_HA and R_PPC64_REL16_LO are handled to support + * ABIv2 r2 assignment based on r12 for PIC executable. + * Here address is know so replace + * 0: addis 2,12,.TOC.-0b@ha + * addi 2,2,.TOC.-0b@l + * by + * lis 2,.TOC.@ha + * addi 2,2,.TOC.@l + */ + case R_PPC64_REL16_HA: + /* check that we are dealing with the addis 2,12 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x3c4c0000) + die("Unexpected instruction for R_PPC64_REL16_HA"); + value += my_r2(ehdr); + /* replacing by lis 2 */ + *(uint32_t *)location = 0x3c400000 + ((value >> 16) & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* check that we are dealing with the addi 2,2 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x38420000) + die("Unexpected instruction for R_PPC64_REL16_LO"); + + value += my_r2(ehdr) - 4; + *(uint16_t *)location = value & 0xffff; + break; + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c new file mode 100644 index 0000000..611809f --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -0,0 +1,969 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <arch/options.h> + +static struct memory_range *exclude_range = NULL; +static struct memory_range *memory_range = NULL; +static struct memory_range *base_memory_range = NULL; +static uint64_t rma_top; +uint64_t memory_max = 0; +uint64_t memory_limit; +static int nr_memory_ranges, nr_exclude_ranges; +uint64_t crash_base, crash_size; +unsigned int rtas_base, rtas_size; +uint64_t opal_base, opal_size; +int max_memory_ranges; + +static void cleanup_memory_ranges(void) +{ + if (memory_range) + free(memory_range); + if (base_memory_range) + free(base_memory_range); + if (exclude_range) + free(exclude_range); + if (usablemem_rgns.ranges) + free(usablemem_rgns.ranges); +} + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = (struct memory_range *) malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = (struct memory_range *) malloc(memory_range_len); + if (!exclude_range) + goto err1; + + usablemem_rgns.ranges = (struct memory_range *) + malloc(memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + memset(usablemem_rgns.ranges, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; + +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + + +static void add_base_memory_range(uint64_t start, uint64_t end) +{ + base_memory_range[nr_memory_ranges].start = start; + base_memory_range[nr_memory_ranges].end = end; + base_memory_range[nr_memory_ranges].type = RANGE_RAM; + nr_memory_ranges++; + if (nr_memory_ranges >= max_memory_ranges) + realloc_memory_ranges(); + + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[nr_memory_ranges-1].start, + base_memory_range[nr_memory_ranges-1].end, + base_memory_range[nr_memory_ranges-1].type); +} + +static int get_dyn_reconf_base_ranges(void) +{ + uint64_t start, end; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,lmb-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + if (fread(buf, 1, 8, file) != 8) { + perror(fname); + fclose(file); + return -1; + } + /* + * lmb_size, num_of_lmb_sets(global variables) are + * initialized once here. + */ + size = lmb_size = be64_to_cpu(((uint64_t *)buf)[0]); + fclose(file); + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, + "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if ((file = fopen(fname, "r")) == NULL) { + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + is_dyn_mem_v2 = 1; + } + + /* first 4 bytes tell the number of lmb set entries */ + if (fread(buf, 1, 4, file) != 4) { + perror(fname); + fclose(file); + return -1; + } + num_of_lmb_sets = be32_to_cpu(((unsigned int *)buf)[0]); + + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + add_base_memory_range(start, end); + } + fclose(file); + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + uint64_t start, end; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)) { + get_dyn_reconf_base_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0) + break; + } + start = be64_to_cpu(((uint64_t *)buf)[0]); + end = start + be64_to_cpu(((uint64_t *)buf)[1]); + add_base_memory_range(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index) +{ + char fname[256], buf[16]; + FILE *file; + int i = *range_index; + + strcpy(fname, "/proc/device-tree/reserved-ranges"); + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return; + } + errno = 0; + /* File not present. Non PowerKVM system. */ + return; + } + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) { + uint64_t base, size; + + base = be64_to_cpu(((uint64_t *)buf)[0]); + size = be64_to_cpu(((uint64_t *)buf)[1]); + + exclude_range[i].start = base; + exclude_range[i].end = base + size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + reserve(base, size); + } + fclose(file); + *range_index = i; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rma_base = -1, base; + uint64_t tce_base; + unsigned int tce_size; + uint64_t htab_base, htab_size; + uint64_t kernel_end; + uint64_t initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + struct stat fstat; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + scan_reserved_ranges(kexec_flags, &i); + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory") && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4) && + strncmp(dentry->d_name, "ibm,opal", 8)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + strcat(fname, "/linux,kernel-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&kernel_end, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + kernel_end = be64_to_cpu(kernel_end); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + if (kexec_flags & KEXEC_ON_CRASH) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_base, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_base = be64_to_cpu(crash_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_size, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_size = be64_to_cpu(crash_size); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + + add_usable_mem_rgns(0, crash_base + crash_size); + reserve(KDUMP_BACKUP_LIMIT, crash_base-KDUMP_BACKUP_LIMIT); + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,memory-limit"); + if ((file = fopen(fname, "r")) == NULL) { + if (errno != ENOENT) { + perror(fname); + goto error_opencdir; + } + errno = 0; + /* + * File not present. + * fall through. On older kernel this file + * is not present. + */ + } else { + if (fread(&memory_limit, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + memory_limit = be64_to_cpu(memory_limit); + } + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_base = be64_to_cpu(htab_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_size = be64_to_cpu(htab_size); + + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + /* reserve the initrd_start and end locations. */ + if (reuse_initrd) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-start"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_start, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_start = be64_to_cpu(initrd_start); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_end, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_end = be64_to_cpu(initrd_end); + fclose(file); + + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* chosen */ + + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + rtas_base = be32_to_cpu(rtas_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + rtas_size = be32_to_cpu(rtas_size); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (strncmp(dentry->d_name, "ibm,opal", 8) == 0) { + strcat(fname, "/opal-base-address"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + opal_base = be64_to_cpu(opal_base); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/opal-runtime-size"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + opal_size = be64_to_cpu(opal_size); + /* Add OPAL to exclude_range */ + exclude_range[i].start = opal_base; + exclude_range[i].end = opal_base + opal_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(opal_base, opal_size); + } /* ibm,opal */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + base = be64_to_cpu(((uint64_t *)buf)[0]); + if (base < rma_base) { + rma_base = base; + rma_top = base + be64_to_cpu(((uint64_t *)buf)[1]); + } + + fclose(file); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_base = be64_to_cpu(tce_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,tce-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_size = be32_to_cpu(tce_size); + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + +/* Setup a sorted list of memory ranges. */ +int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit the end to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit range to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + } + nr_memory_ranges = j; + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + +/* Return a list of valid memory ranges */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + /* allocate memory_range dynamically */ + max_memory_ranges = 1; + + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + /* + * copy the memory here, another realloc_memory_ranges might + * corrupt the old memory + */ + *range = calloc(sizeof(struct memory_range), nr_memory_ranges); + if (*range == NULL) + return -1; + memmove(*range, memory_range, + sizeof(struct memory_range) * nr_memory_ranges); + + *ranges = nr_memory_ranges; + dbgprintf("get memory ranges:%d\n", nr_memory_ranges); + return 0; +} + +struct file_type file_type[] = { + { "elf-ppc64", elf_ppc64_probe, elf_ppc64_load, elf_ppc64_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + fprintf(stderr, " --elf64-core-headers Prepare core headers in ELF64 format\n"); + fprintf(stderr, " --dt-no-old-root Do not reuse old kernel root= param.\n" \ + " while creating flatten device tree.\n"); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* We are running a 32-bit kexec-tools on 64-bit ppc64. + * So pass KEXEC_ARCH_PPC64 here + */ + { "ppc64", KEXEC_ARCH_PPC64 }, + { "ppc64le", KEXEC_ARCH_PPC64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h new file mode 100644 index 0000000..434b4bf --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_PPC64_H +#define KEXEC_PPC64_H + +#define PATH_LEN 256 +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 17 +#if (BOOT_BLOCK_VERSION < 16) +# define NEED_STRUCTURE_BLOCK_EXTRA_PAD +#endif +#define HAVE_DYNAMIC_MEMORY +#define NEED_RESERVE_DTB + +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); + +int setup_memory_ranges(unsigned long kexec_flags); + +int elf_ppc64_probe(const char *buf, off_t len); +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc64_usage(void); + +struct mem_ehdr; +unsigned long my_r2(const struct mem_ehdr *ehdr); + +extern uint64_t initrd_base, initrd_size; +extern int max_memory_ranges; +extern unsigned char reuse_initrd; + +struct arch_options_t { + int core_header_type; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; + +extern mem_rgns_t usablemem_rgns; + +#endif /* KEXEC_PPC64_H */ diff --git a/kexec/arch/ppc64/kexec-zImage-ppc64.c b/kexec/arch/ppc64/kexec-zImage-ppc64.c new file mode 100644 index 0000000..e946205 --- /dev/null +++ b/kexec/arch/ppc64/kexec-zImage-ppc64.c @@ -0,0 +1,184 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" + +#define MAX_HEADERS 32 + +int zImage_ppc64_probe(FILE *file) +{ + Elf32_Ehdr elf; + int valid; + + if (fseek(file, 0, SEEK_SET) < 0) { + fprintf(stderr, "seek error: %s\n", + strerror(errno)); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + fprintf(stderr, "read error: %s\n", + strerror(errno)); + return -1; + } + + if (elf.e_machine == EM_PPC64) { + fprintf(stderr, "Elf64 not supported\n"); + return -1; + } + + valid = (elf.e_ident[EI_MAG0] == ELFMAG0 && + elf.e_ident[EI_MAG1] == ELFMAG1 && + elf.e_ident[EI_MAG2] == ELFMAG2 && + elf.e_ident[EI_MAG3] == ELFMAG3 && + elf.e_ident[EI_CLASS] == ELFCLASS32 && + elf.e_ident[EI_DATA] == ELFDATA2MSB && + elf.e_type == ET_EXEC && + elf.e_machine == EM_PPC); + + return valid ? 0 : -1; +} + +int zImage_ppc64_load(FILE *file, int UNUSED(argc), char **UNUSED(argv), + void **ret_entry, struct kexec_segment **ret_segments, + int *ret_nr_segments) +{ + Elf32_Ehdr elf; + Elf32_Phdr *p, *ph; + struct kexec_segment *segment; + int i; + unsigned long memsize, filesize, offset, load_loc = 0; + + /* Parse command line arguments */ + + /* Read in the Elf32 header */ + if (fseek(file, 0, SEEK_SET) < 0) { + perror("seek error:"); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + perror("read error: "); + return -1; + } + if (elf.e_phnum > MAX_HEADERS) { + fprintf(stderr, + "Only kernels with %i program headers are supported\n", + MAX_HEADERS); + return -1; + } + + /* Read the section header */ + ph = (Elf32_Phdr *)malloc(sizeof(Elf32_Phdr) * elf.e_phnum); + if (ph == 0) { + perror("malloc failed: "); + return -1; + } + if (fseek(file, elf.e_phoff, SEEK_SET) < 0) { + perror("seek failed: "); + free(ph); + return -1; + } + if (fread(ph, sizeof(Elf32_Phdr) * elf.e_phnum, 1, file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + + *ret_segments = malloc(elf.e_phnum * sizeof(struct kexec_segment)); + if (*ret_segments == 0) { + fprintf(stderr, "malloc failed: %s\n", + strerror(errno)); + free(ph); + return -1; + } + segment = ret_segments[0]; + + /* Scan through the program header */ + memsize = filesize = offset = 0; + p = ph; + for (i = 0; i < elf.e_phnum; ++i, ++p) { + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + if (memsize == 0) { + offset = p->p_offset; + memsize = p->p_memsz; + filesize = p->p_filesz; + load_loc = p->p_vaddr; + } else { + memsize = p->p_offset + p->p_memsz - offset; + filesize = p->p_offset + p->p_filesz - offset; + } + } + if (memsize == 0) { + fprintf(stderr, "Can't find a loadable segment.\n"); + free(ph); + return -1; + } + + /* Load program segments */ + p = ph; + segment->buf = malloc(filesize); + if (segment->buf == 0) { + perror("malloc failed: "); + free(ph); + return -1; + } + for (i = 0; i < elf.e_phnum; ++i, ++p) { + unsigned long mem_offset; + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + + /* skip to the actual image */ + if (fseek(file, p->p_offset, SEEK_SET) < 0) { + perror("seek error: "); + free(ph); + return -1; + } + mem_offset = p->p_vaddr - load_loc; + if (fread((void *)segment->buf+mem_offset, p->p_filesz, 1, + file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + } + segment->mem = (void *) load_loc; + segment->memsz = memsize; + segment->bufsz = filesize; + *ret_entry = (void *)(uintptr_t)elf.e_entry; + *ret_nr_segments = i - 1; + free(ph); + return 0; +} + +void zImage_ppc64_usage(void) +{ + fprintf(stderr, "zImage support is still broken\n"); +} diff --git a/kexec/arch/s390/Makefile b/kexec/arch/s390/Makefile new file mode 100644 index 0000000..fab3e68 --- /dev/null +++ b/kexec/arch/s390/Makefile @@ -0,0 +1,11 @@ +# +# kexec s390 (linux booting linux) +# +s390_KEXEC_SRCS = kexec/arch/s390/kexec-s390.c +s390_KEXEC_SRCS += kexec/arch/s390/kexec-image.c +s390_KEXEC_SRCS += kexec/arch/s390/kexec-elf-rel-s390.c +s390_KEXEC_SRCS += kexec/arch/s390/crashdump-s390.c + +dist += kexec/arch/s390/Makefile $(s390_KEXEC_SRCS) \ + kexec/arch/s390/kexec-s390.h \ + kexec/arch/s390/include/arch/options.h diff --git a/kexec/arch/s390/crashdump-s390.c b/kexec/arch/s390/crashdump-s390.c new file mode 100644 index 0000000..3bd9efe --- /dev/null +++ b/kexec/arch/s390/crashdump-s390.c @@ -0,0 +1,90 @@ +/* + * kexec/arch/s390/crashdump-s390.c + * + * Copyright IBM Corp. 2011 + * + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifdef __s390x__ +#define _GNU_SOURCE + +#include <stdio.h> +#include <elf.h> +#include <limits.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec/crashdump.h" +#include "kexec-s390.h" + +/* + * Create ELF core header + */ +static int create_elf_header(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ +#ifdef WITH_ELF_HEADER + static struct memory_range crash_memory_range[MAX_MEMORY_RANGES]; + unsigned long elfcorehdr, elfcorehdr_size, bufsz; + struct crash_elf_info elf_info; + char str[COMMAND_LINESIZE]; + int ranges; + void *tmp; + + memset(&elf_info, 0, sizeof(elf_info)); + + elf_info.data = ELFDATA2MSB; + elf_info.machine = EM_S390; + elf_info.class = ELFCLASS64; + elf_info.get_note_info = get_crash_notes_per_cpu; + + if (get_memory_ranges_s390(crash_memory_range, &ranges, 0)) + return -1; + + if (crash_create_elf64_headers(info, &elf_info, crash_memory_range, + ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN)) + return -1; + + elfcorehdr = add_buffer(info, tmp, bufsz, bufsz, 1024, + crash_base, crash_end, -1); + elfcorehdr_size = bufsz; + snprintf(str, sizeof(str), " elfcorehdr=%ld@%ldK\n", + elfcorehdr_size, elfcorehdr / 1024); + if (command_line_add(info, str)) + return -1; +#endif + return 0; +} + +/* + * Load additional segments for kdump kernel + */ +int load_crashdump_segments(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ + unsigned long crash_size = crash_size = crash_end - crash_base + 1; + + if (create_elf_header(info, crash_base, crash_end)) + return -1; + elf_rel_build_load(info, &info->rhdr, (const char *) purgatory, + purgatory_size, crash_base + 0x2000, + crash_base + 0x10000, -1, 0); + elf_rel_set_symbol(&info->rhdr, "crash_base", &crash_base, + sizeof(crash_base)); + elf_rel_set_symbol(&info->rhdr, "crash_size", &crash_size, + sizeof(crash_size)); + info->entry = (void *) elf_rel_get_addr(&info->rhdr, "purgatory_start"); + return 0; +} +#else +/* + * kdump is not available for s390 + */ +int load_crashdump_segments(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ + return -1; +} +#endif diff --git a/kexec/arch/s390/include/arch/options.h b/kexec/arch/s390/include/arch/options.h new file mode 100644 index 0000000..c150244 --- /dev/null +++ b/kexec/arch/s390/include/arch/options.h @@ -0,0 +1,40 @@ +#ifndef KEXEC_ARCH_S390_OPTIONS_H +#define KEXEC_ARCH_S390_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_MAX+0) +#define OPT_RAMDISK (OPT_MAX+1) +#define OPT_REUSE_CMDLINE (OPT_MAX+2) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"initrd", 1, 0, OPT_RAMDISK}, \ + {"reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_S390_OPTIONS_H */ diff --git a/kexec/arch/s390/kexec-elf-rel-s390.c b/kexec/arch/s390/kexec-elf-rel-s390.c new file mode 100644 index 0000000..91ba86a --- /dev/null +++ b/kexec/arch/s390/kexec-elf-rel-s390.c @@ -0,0 +1,78 @@ +/* + * kexec/arch/s390/kexec-elf-rel-s390.c + * + * Copyright IBM Corp. 2005,2011 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->ei_class != ELFCLASS64) + return 0; + if (ehdr->e_machine != EM_S390) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *loc, + unsigned long address, + unsigned long val) +{ + switch (r_type) { + case R_390_8: /* Direct 8 bit. */ + case R_390_12: /* Direct 12 bit. */ + case R_390_16: /* Direct 16 bit. */ + case R_390_20: /* Direct 20 bit. */ + case R_390_32: /* Direct 32 bit. */ + case R_390_64: /* Direct 64 bit. */ + if (r_type == R_390_8) + *(unsigned char *) loc = val; + else if (r_type == R_390_12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (r_type == R_390_16) + *(unsigned short *) loc = val; + else if (r_type == R_390_20) + *(unsigned int *) loc = + (*(unsigned int *) loc & 0xf00000ff) | + (val & 0xfff) << 16 | (val & 0xff000) >> 4; + else if (r_type == R_390_32) + *(unsigned int *) loc = val; + else if (r_type == R_390_64) + *(unsigned long *) loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ + case R_390_PC32: /* PC relative 32 bit. */ + case R_390_PC64: /* PC relative 64 bit. */ + val -= address; + if (r_type == R_390_PC16) + *(unsigned short *) loc = val; + else if (r_type == R_390_PC16DBL) + *(unsigned short *) loc = val >> 1; + else if (r_type == R_390_PC32DBL || r_type == R_390_PLT32DBL) + *(unsigned int *) loc = val >> 1; + else if (r_type == R_390_PC32) + *(unsigned int *) loc = val; + else if (r_type == R_390_PC64) + *(unsigned long *) loc = val; + break; + default: + die("Unknown rela relocation: 0x%lx 0x%lx\n", r_type, address); + break; + } +} diff --git a/kexec/arch/s390/kexec-image.c b/kexec/arch/s390/kexec-image.c new file mode 100644 index 0000000..69aaf96 --- /dev/null +++ b/kexec/arch/s390/kexec-image.c @@ -0,0 +1,236 @@ +/* + * kexec/arch/s390/kexec-image.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec/crashdump.h" +#include "kexec-s390.h" +#include <arch/options.h> +#include <fcntl.h> + +static uint64_t crash_base, crash_end; + +static void add_segment_check(struct kexec_info *info, const void *buf, + size_t bufsz, unsigned long base, size_t memsz) +{ + if (info->kexec_flags & KEXEC_ON_CRASH) + if (base + memsz > crash_end - crash_base) + die("Not enough crashkernel memory to load segments\n"); + add_segment(info, buf, bufsz, crash_base + base, memsz); +} + +int command_line_add(struct kexec_info *info, const char *str) +{ + char *tmp = NULL; + + tmp = concat_cmdline(info->command_line, str); + if (!tmp) { + fprintf(stderr, "out of memory\n"); + return -1; + } + + free(info->command_line); + info->command_line = tmp; + return 0; +} + +int image_s390_load_file(int argc, char **argv, struct kexec_info *info) +{ + const char *ramdisk = NULL; + int opt; + + static const struct option options[] = + { + KEXEC_ALL_OPTIONS + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_OPT_STR ""; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + case OPT_APPEND: + if (command_line_add(info, optarg)) + return -1; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REUSE_CMDLINE: + free(info->command_line); + info->command_line = get_command_line(); + break; + } + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + free(info->command_line); + info->command_line = NULL; + return -1; + } + } + + if (info->command_line) + info->command_line_len = strlen(info->command_line) + 1; + else + info->command_line_len = 0; + return 0; +} + +int +image_s390_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + void *krnl_buffer; + char *rd_buffer; + const char *ramdisk; + off_t ramdisk_len; + unsigned int ramdisk_origin; + int opt, ret = -1; + + if (info->file_mode) + return image_s390_load_file(argc, argv, info); + + static const struct option options[] = + { + KEXEC_ALL_OPTIONS + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_OPT_STR ""; + + ramdisk = NULL; + ramdisk_len = 0; + ramdisk_origin = 0; + + while ((opt = getopt_long(argc,argv,short_options,options,0)) != -1) { + switch(opt) { + case OPT_APPEND: + if (command_line_add(info, optarg)) + return -1; + break; + case OPT_REUSE_CMDLINE: + free(info->command_line); + info->command_line = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + } + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (parse_iomem_single("Crash kernel\n", &crash_base, + &crash_end)) + goto out; + } + + /* Add kernel segment */ + add_segment_check(info, kernel_buf + IMAGE_READ_OFFSET, + kernel_size - IMAGE_READ_OFFSET, IMAGE_READ_OFFSET, + kernel_size - IMAGE_READ_OFFSET); + + /* We do want to change the kernel image */ + krnl_buffer = (void *) kernel_buf + IMAGE_READ_OFFSET; + + /* + * Load ramdisk if present: If image is larger than RAMDISK_ORIGIN_ADDR, + * we load the ramdisk directly behind the image with 1 MiB alignment. + */ + if (ramdisk) { + rd_buffer = slurp_file_mmap(ramdisk, &ramdisk_len); + if (rd_buffer == NULL) { + fprintf(stderr, "Could not read ramdisk.\n"); + goto out; + } + ramdisk_origin = MAX(RAMDISK_ORIGIN_ADDR, kernel_size); + ramdisk_origin = _ALIGN_UP(ramdisk_origin, 0x100000); + add_segment_check(info, rd_buffer, ramdisk_len, + ramdisk_origin, ramdisk_len); + } + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (load_crashdump_segments(info, crash_base, crash_end)) + goto out; + } else { + info->entry = (void *) IMAGE_READ_OFFSET; + } + + /* Register the ramdisk and crashkernel memory in the kernel. */ + { + unsigned long long *tmp; + + tmp = krnl_buffer + INITRD_START_OFFS; + *tmp = (unsigned long long) ramdisk_origin; + + tmp = krnl_buffer + INITRD_SIZE_OFFS; + *tmp = (unsigned long long) ramdisk_len; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + tmp = krnl_buffer + OLDMEM_BASE_OFFS; + *tmp = crash_base; + + tmp = krnl_buffer + OLDMEM_SIZE_OFFS; + *tmp = crash_end - crash_base + 1; + } + } + + if (info->command_line) { + unsigned long maxsize; + char *dest = krnl_buffer + COMMAND_LINE_OFFS; + + maxsize = *(unsigned long *)(krnl_buffer + MAX_COMMAND_LINESIZE_OFFS); + if (!maxsize) + maxsize = LEGACY_COMMAND_LINESIZE; + + if (strlen(info->command_line) > maxsize-1) { + fprintf(stderr, "command line too long, maximum allowed size %ld\n", + maxsize-1); + goto out; + } + strncpy(dest, info->command_line, maxsize-1); + dest[maxsize-1] = '\0'; + } + ret = 0; +out: + free(info->command_line); + info->command_line = NULL; + return ret; +} + +int +image_s390_probe(const char *UNUSED(kernel_buf), off_t UNUSED(kernel_size)) +{ + /* + * Can't reliably tell if an image is valid, + * therefore everything is valid. + */ + return 0; +} + +void +image_s390_usage(void) +{ + printf("--command-line=STRING Set the kernel command line to STRING.\n" + "--append=STRING Set the kernel command line to STRING.\n" + "--initrd=FILENAME Use the file FILENAME as a ramdisk.\n" + "--reuse-cmdline Use kernel command line from running system.\n" + ); +} diff --git a/kexec/arch/s390/kexec-s390.c b/kexec/arch/s390/kexec-s390.c new file mode 100644 index 0000000..33ba6b9 --- /dev/null +++ b/kexec/arch/s390/kexec-s390.c @@ -0,0 +1,269 @@ +/* + * kexec/arch/s390/kexec-s390.c + * + * Copyright IBM Corp. 2005,2011 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + * + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <dirent.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-s390.h" +#include <arch/options.h> + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* + * Read string from file + */ +static void read_str(char *string, const char *path, size_t len) +{ + size_t rc; + FILE *fh; + + fh = fopen(path, "rb"); + if (fh == NULL) + die("Could not open \"%s\"", path); + rc = fread(string, 1, len - 1, fh); + if (rc == 0 && ferror(fh)) + die("Could not read \"%s\"", path); + fclose(fh); + string[rc] = 0; + if (string[strlen(string) - 1] == '\n') + string[strlen(string) - 1] = 0; +} + +/* + * Return number of memory chunks + */ +static int memory_range_cnt(struct memory_range chunks[]) +{ + int i; + + for (i = 0; i < MAX_MEMORY_RANGES; i++) { + if (chunks[i].end == 0) + break; + } + return i; +} + +/* + * Create memory hole with given address and size + * + * lh = local hole + */ +static void add_mem_hole(struct memory_range chunks[], unsigned long addr, + unsigned long size) +{ + unsigned long lh_start, lh_end, lh_size, chunk_cnt; + int i; + + chunk_cnt = memory_range_cnt(chunks); + + for (i = 0; i < chunk_cnt; i++) { + if (addr + size <= chunks[i].start) + break; + if (addr > chunks[i].end) + continue; + lh_start = MAX(addr, chunks[i].start); + lh_end = MIN(addr + size - 1, chunks[i].end); + lh_size = lh_end - lh_start + 1; + if (lh_start == chunks[i].start && lh_end == chunks[i].end) { + /* Remove chunk */ + memmove(&chunks[i], &chunks[i + 1], + sizeof(struct memory_range) * + (MAX_MEMORY_RANGES - (i + 1))); + memset(&chunks[MAX_MEMORY_RANGES - 1], 0, + sizeof(struct memory_range)); + chunk_cnt--; + i--; + } else if (lh_start == chunks[i].start) { + /* Make chunk smaller at start */ + chunks[i].start = chunks[i].start + lh_size; + break; + } else if (lh_end == chunks[i].end) { + /* Make chunk smaller at end */ + chunks[i].end = lh_start - 1; + } else { + /* Split chunk into two */ + if (chunk_cnt >= MAX_MEMORY_RANGES) + die("Unable to create memory hole: %i", i); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct memory_range) * + (MAX_MEMORY_RANGES - (i + 1))); + chunks[i + 1].start = lh_start + lh_size; + chunks[i].end = lh_start - 1; + break; + } + } +} + +/* + * Remove offline memory from memory chunks + */ +static void remove_offline_memory(struct memory_range memory_range[]) +{ + unsigned long block_size, chunk_nr; + struct dirent *dirent; + char path[PATH_MAX]; + char str[64]; + DIR *dir; + + read_str(str, "/sys/devices/system/memory/block_size_bytes", + sizeof(str)); + sscanf(str, "%lx", &block_size); + + dir = opendir("/sys/devices/system/memory"); + if (!dir) + die("Could not read \"/sys/devices/system/memory\""); + while ((dirent = readdir(dir))) { + if (sscanf(dirent->d_name, "memory%ld\n", &chunk_nr) != 1) + continue; + sprintf(path, "/sys/devices/system/memory/%s/state", + dirent->d_name); + read_str(str, path, sizeof(str)); + if (strncmp(str, "offline", 6) != 0) + continue; + add_mem_hole(memory_range, chunk_nr * block_size, block_size); + } + closedir(dir); +} + +/* + * Get memory ranges of type "System RAM" from /proc/iomem. If with_crashk=1 + * then also type "Crash kernel" is added. + */ +int get_memory_ranges_s390(struct memory_range memory_range[], int *ranges, + int with_crashk) +{ + char crash_kernel[] = "Crash kernel\n"; + char sys_ram[] = "System RAM\n"; + const char *iomem = proc_iomem(); + FILE *fp; + char line[80]; + int current_range = 0; + + fp = fopen(iomem,"r"); + if(fp == 0) { + fprintf(stderr,"Unable to open %s: %s\n",iomem,strerror(errno)); + return -1; + } + + /* Setup the compare string properly. */ + while (fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + int cons; + char *str; + + if (current_range == MAX_MEMORY_RANGES) + break; + + sscanf(line,"%llx-%llx : %n", &start, &end, &cons); + str = line+cons; + if ((memcmp(str, sys_ram, strlen(sys_ram)) == 0) || + ((memcmp(str, crash_kernel, strlen(crash_kernel)) == 0) && + with_crashk)) { + memory_range[current_range].start = start; + memory_range[current_range].end = end; + memory_range[current_range].type = RANGE_RAM; + current_range++; + } + else { + continue; + } + } + fclose(fp); + remove_offline_memory(memory_range); + *ranges = memory_range_cnt(memory_range); + return 0; +} + +/* + * get_memory_ranges: + * Return a list of memory ranges by parsing the file returned by + * proc_iomem() + * + * INPUT: + * - Pointer to an array of memory_range structures. + * - Pointer to an integer with holds the number of memory ranges. + * + * RETURN: + * - 0 on normal execution. + * - (-1) if something went wrong. + */ + +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long flags) +{ + uint64_t start, end; + + if (get_memory_ranges_s390(memory_range, ranges, + flags & KEXEC_ON_CRASH)) + return -1; + *range = memory_range; + if ((flags & KEXEC_ON_CRASH) && !(flags & KEXEC_PRESERVE_CONTEXT)) { + if (parse_iomem_single("Crash kernel\n", &start, &end)) + return -1; + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + { "image", image_s390_probe, image_s390_load, image_s390_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ +} + +int arch_process_options(int UNUSED(argc), char **UNUSED(argv)) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + { "s390", KEXEC_ARCH_S390 }, + { "s390x", KEXEC_ARCH_S390 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/s390/kexec-s390.h b/kexec/arch/s390/kexec-s390.h new file mode 100644 index 0000000..6a99518 --- /dev/null +++ b/kexec/arch/s390/kexec-s390.h @@ -0,0 +1,38 @@ +/* + * kexec/arch/s390/kexec-s390.h + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +#ifndef KEXEC_S390_H +#define KEXEC_S390_H + +#define IMAGE_READ_OFFSET 0x10000 + +#define RAMDISK_ORIGIN_ADDR 0x800000 +#define INITRD_START_OFFS 0x408 +#define INITRD_SIZE_OFFS 0x410 +#define OLDMEM_BASE_OFFS 0x418 +#define OLDMEM_SIZE_OFFS 0x420 +#define MAX_COMMAND_LINESIZE_OFFS 0x430 +#define COMMAND_LINE_OFFS 0x480 +#define LEGACY_COMMAND_LINESIZE 896 +#define MAX_MEMORY_RANGES 1024 + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +extern int image_s390_load(int, char **, const char *, off_t, struct kexec_info *); +extern int image_s390_probe(const char *, off_t); +extern void image_s390_usage(void); +extern int load_crashdump_segments(struct kexec_info *info, + unsigned long crash_base, + unsigned long crash_end); +extern int get_memory_ranges_s390(struct memory_range range[], int *ranges, + int with_crashk); +extern int command_line_add(struct kexec_info *info, const char *str); + +#endif /* KEXEC_S390_H */ diff --git a/kexec/arch/sh/Makefile b/kexec/arch/sh/Makefile new file mode 100644 index 0000000..7cf40ae --- /dev/null +++ b/kexec/arch/sh/Makefile @@ -0,0 +1,22 @@ +# +# kexec sh (linux booting linux) +# +sh_KEXEC_SRCS += kexec/arch/sh/kexec-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-uImage-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-zImage-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-netbsd-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-elf-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-elf-rel-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/netbsd_booter.S +sh_KEXEC_SRCS += kexec/arch/sh/crashdump-sh.c + +sh_UIMAGE = kexec/kexec-uImage.c + +sh_ADD_BUFFER = +sh_ADD_SEGMENT = +sh_VIRT_TO_PHYS = + +dist += kexec/arch/sh/Makefile $(sh_KEXEC_SRCS) \ + kexec/arch/sh/kexec-sh.h \ + kexec/arch/sh/crashdump-sh.h \ + kexec/arch/sh/include/arch/options.h diff --git a/kexec/arch/sh/crashdump-sh.c b/kexec/arch/sh/crashdump-sh.c new file mode 100644 index 0000000..36e9aaf --- /dev/null +++ b/kexec/arch/sh/crashdump-sh.c @@ -0,0 +1,166 @@ +/* + * crashdump-sh.c - crashdump for SuperH + * Copyright (C) 2008 Magnus Damm + * + * Based on x86 and ppc64 implementation, written by + * Vivek Goyal (vgoyal@in.ibm.com), R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-sh.h" +#include "crashdump-sh.h" +#include <arch/options.h> + +#define CRASH_MAX_MEMORY_RANGES 64 +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +static int crash_sh_range_nr; +static int crash_sh_memory_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, + unsigned long long base, + unsigned long long length) +{ + + struct memory_range *range = crash_memory_range; + struct memory_range *range2 = crash_memory_range; + + range += crash_sh_range_nr; + + if (crash_sh_range_nr >= CRASH_MAX_MEMORY_RANGES) { + return 1; + } + + if (strncmp(str, "System RAM\n", 11) == 0) { + range->start = base; + range->end = base + length - 1; + range->type = RANGE_RAM; + crash_sh_range_nr++; + } + + if (strncmp(str, "Crash kernel\n", 13) == 0) { + if (!crash_sh_range_nr) + die("Unsupported /proc/iomem format\n"); + + range2 = range - 1; + if ((base + length - 1) < range2->end) { + range->start = base + length; + range->end = range2->end; + range->type = RANGE_RAM; + crash_sh_range_nr++; + } + range2->end = base - 1; + } + + return 0; +} + +/* Return a sorted list of available memory ranges. */ +static int crash_get_memory_ranges(struct memory_range **range, int *ranges) +{ + crash_sh_range_nr = 0; + + kexec_iomem_for_each_line(NULL, crash_sh_memory_range_callback, NULL); + *range = crash_memory_range; + *ranges = crash_sh_range_nr; + return 0; +} + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2LSB, + machine: EM_SH, + page_offset: PAGE_OFFSET, +}; + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for storing elf headers for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges; + struct memory_range *mem_range; + + if (crash_get_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + if (crash_create_elf32_headers(info, &elf_info32, + mem_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) + return -1; + + elfcorehdr = add_buffer_phys_virt(info, tmp, sz, sz, 1024, + 0, 0xffffffff, -1, 0); + + dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + add_cmdline_param(mod_cmdline, elfcorehdr - mem_min, " mem=", "K"); + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/sh/crashdump-sh.h b/kexec/arch/sh/crashdump-sh.h new file mode 100644 index 0000000..c5d4102 --- /dev/null +++ b/kexec/arch/sh/crashdump-sh.h @@ -0,0 +1,9 @@ +#ifndef CRASHDUMP_SH_H +#define CRASHDUMP_SH_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline); + +#define PAGE_OFFSET 0x80000000 + +#endif /* CRASHDUMP_SH_H */ diff --git a/kexec/arch/sh/include/arch/options.h b/kexec/arch/sh/include/arch/options.h new file mode 100644 index 0000000..f923eb4 --- /dev/null +++ b/kexec/arch/sh/include/arch/options.h @@ -0,0 +1,42 @@ +#ifndef KEXEC_ARCH_SH_OPTIONS_H +#define KEXEC_ARCH_SH_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+1) +#define OPT_EMPTYZERO (OPT_ARCH_MAX+2) +#define OPT_NBSD_HOWTO (OPT_ARCH_MAX+3) +#define OPT_NBSD_MROOT (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"empty-zero", 1, 0, OPT_APPEND}, \ + {"howto", 1, 0, OPT_NBSD_HOWTO}, \ + {"miniroot", 1, 0, OPT_NBSD_MROOT}, +/* These options seem to be loader-specific rather than superh-specific, so + * ought to be moved to KEXEC_ALL_OPTIONS below and parsed in the relevant + * loader, e.g. kexec-netbsd-sh.c + */ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_SH_OPTIONS_H */ diff --git a/kexec/arch/sh/kexec-elf-rel-sh.c b/kexec/arch/sh/kexec-elf-rel-sh.c new file mode 100644 index 0000000..3993ee8 --- /dev/null +++ b/kexec/arch/sh/kexec-elf-rel-sh.c @@ -0,0 +1,54 @@ +/* + * kexec-elf-rel-sh.c - ELF relocations for SuperH + * Copyright (C) 2008 Paul Mundt + * + * Based on the SHcompact module loader (arch/sh/kernel/module.c) in the + * Linux kernel, which is written by: + * + * Copyright (C) 2003 - 2008 Kaz Kojima & Paul Mundt + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + /* Intentionally don't bother with endianness validation, it's + * configurable */ + + if (ehdr->ei_class != ELFCLASS32) + return 0; + if (ehdr->e_machine != EM_SH) + return 0; + + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *orig_loc, + unsigned long UNUSED(address), unsigned long relocation) +{ + uint32_t *location = orig_loc; + uint32_t value; + + switch (r_type) { + case R_SH_DIR32: + value = get_unaligned(location); + value += relocation; + put_unaligned(value, location); + break; + case R_SH_REL32: + relocation = (relocation - (uint32_t)location); + value = get_unaligned(location); + value += relocation; + put_unaligned(value, location); + break; + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/sh/kexec-elf-sh.c b/kexec/arch/sh/kexec-elf-sh.c new file mode 100644 index 0000000..897552c --- /dev/null +++ b/kexec/arch/sh/kexec-elf-sh.c @@ -0,0 +1,136 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2008 Magnus Damm + * + * Based on x86 implementation, + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include <arch/options.h> +#include "crashdump-sh.h" +#include "kexec-sh.h" + +int elf_sh_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_SH) { + result = -1; + goto out; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_sh_usage(void) +{ + printf(" --append=STRING Set the kernel command line to STRING\n" + ); +} + +int elf_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line; + char *modified_cmdline; + struct mem_sym sym; + int opt, rc; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0, 0, 0, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = modified_cmdline = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + rc = load_crashdump_segments(info, modified_cmdline); + if (rc < 0) + return -1; + /* Use new command line. */ + command_line = modified_cmdline; + } + + /* If we're booting a vmlinux then fill in empty_zero_page */ + if (elf_rel_find_symbol(&ehdr, "empty_zero_page", &sym) == 0) { + char *zp = (void *)ehdr.e_shdr[sym.st_shndx].sh_data; + + kexec_sh_setup_zero_page(zp, 4096, command_line); + } + + return 0; +} diff --git a/kexec/arch/sh/kexec-netbsd-sh.c b/kexec/arch/sh/kexec-netbsd-sh.c new file mode 100644 index 0000000..ed93759 --- /dev/null +++ b/kexec/arch/sh/kexec-netbsd-sh.c @@ -0,0 +1,149 @@ +/* + * kexec-netbsd-sh.c - kexec netbsd loader for the SH + * Copyright (C) 2005 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include <arch/options.h> + +static const int probe_debug = 0; +extern const unsigned char netbsd_booter[]; + +/* + * netbsd_sh_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int netbsd_sh_probe(const char *buf, off_t UNUSED(len)) +{ + Elf32_Ehdr *ehdr; + + ehdr = (Elf32_Ehdr *)buf; + if(memcmp(buf, ELFMAG, SELFMAG) != 0){ + return -1; + } + if (ehdr->e_machine != EM_SH) { + return -1; + } + return 0; +} + +void netbsd_sh_usage(void) +{ + printf( + " --howto=VALUE NetBSD kernel boot option.\n" + " --miniroot=FILE NetBSD miniroot ramdisk.\n\n"); +} + +int netbsd_sh_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + const char *howto, *miniroot; + unsigned long entry, start, size, psz; + char *miniroot_buf; + off_t miniroot_length; + unsigned int howto_value; + unsigned char *param; + unsigned long *paraml; + unsigned char *img; + + int opt; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + howto = miniroot = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_NBSD_HOWTO: + howto = optarg; + break; + case OPT_NBSD_MROOT: + miniroot = optarg; + break; + } + } + + /* howto */ + howto_value = 0; + if(howto){ + howto_value = strtol(howto, NULL, 0); + } + + psz = getpagesize(); + + /* Parse the Elf file */ + { + Elf32_Ehdr *ehdr; + Elf32_Phdr *phdr; + unsigned long bbs; + ehdr = (Elf32_Ehdr *)buf; + phdr = (Elf32_Phdr *)&buf[ehdr->e_phoff]; + + entry = ehdr->e_entry; + img = (unsigned char *)&buf[phdr->p_offset]; + start = (phdr->p_paddr) & 0x1fffffff; + bbs = phdr->p_filesz; + size = phdr->p_memsz; + + if(size < bbs){ + size = bbs; + } + + size = _ALIGN(size, psz); + memset(&img[bbs], 0, size-bbs); + add_segment(info, img, size, start, size); + start += size; + } + + /* miniroot file */ + miniroot_buf = 0; + if (miniroot) { + miniroot_buf = slurp_file(miniroot, &miniroot_length); + howto_value |= 0x200; + size = _ALIGN(miniroot_length, psz); + add_segment(info, miniroot_buf, size, start, size); + start += size; + } + + /* howto & bootinfo */ + param = xmalloc(4096); + memset(param, 0, 4096); + paraml = (unsigned long *) ¶m[256]; + memcpy(param, netbsd_booter, 256); + paraml[0] = entry; + paraml[1] = howto_value; + add_segment(info, param, 4096, start, 4096); + + /* For now we don't have arguments to pass :( */ + info->entry = (void *) (start | 0xa0000000); + return 0; +} diff --git a/kexec/arch/sh/kexec-sh.c b/kexec/arch/sh/kexec-sh.c new file mode 100644 index 0000000..ce341c8 --- /dev/null +++ b/kexec/arch/sh/kexec-sh.c @@ -0,0 +1,259 @@ +/* + * kexec-sh.c - kexec for the SH + * Copyright (C) 2004 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-sh.h" +#include <arch/options.h> + +#define MAX_MEMORY_RANGES 64 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +static int kexec_sh_memory_range_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + if (nr < MAX_MEMORY_RANGES) { + memory_range[nr].start = base; + memory_range[nr].end = base + length - 1; + memory_range[nr].type = RANGE_RAM; + return 0; + } + + return 1; +} + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int nr, ret; + nr = kexec_iomem_for_each_line("System RAM\n", + kexec_sh_memory_range_callback, NULL); + *range = memory_range; + *ranges = nr; + + /* + * Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported values are + * subset of user defined values. + */ + if (kexec_flags & KEXEC_ON_CRASH) { + unsigned long long start, end; + + ret = parse_iomem_single("Crash kernel\n", &start, &end); + if (ret != 0) { + fprintf(stderr, "parse_iomem_single failed.\n"); + return -1; + } + + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + /* uImage is probed before zImage because the latter also accepts + uncompressed images. */ + { "uImage-sh", uImage_sh_probe, uImage_sh_load, zImage_sh_usage }, + { "zImage-sh", zImage_sh_probe, zImage_sh_load, zImage_sh_usage }, + { "elf-sh", elf_sh_probe, elf_sh_load, elf_sh_usage }, + { "netbsd-sh", netbsd_sh_probe, netbsd_sh_load, netbsd_sh_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ + + printf( + " none\n\n" + "Default options:\n" + " --append=\"%s\"\n" + " STRING of --append is set from /proc/cmdline as default.\n" + ,get_append()); + +} + +int arch_process_options(int argc, char **argv) +{ + /* The common options amongst loaders (e.g. --append) should be read + * here, and the loader-specific options (e.g. NetBSD stuff) should + * then be re-parsed in the loader. + * (e.g. in kexec-netbsd-sh.c, for example.) + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_MAX) { + break; + } + case OPT_APPEND: + case OPT_NBSD_HOWTO: + case OPT_NBSD_MROOT: + ; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_SH here. + */ + { "sh3", KEXEC_ARCH_DEFAULT }, + { "sh4", KEXEC_ARCH_DEFAULT }, + { "sh4a", KEXEC_ARCH_DEFAULT }, + { "sh4al-dsp", KEXEC_ARCH_DEFAULT }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +char append_buf[256]; + +char *get_append(void) +{ + FILE *fp; + int len; + if((fp = fopen("/proc/cmdline", "r")) == NULL){ + die("/proc/cmdline file open error !!\n"); + } + fgets(append_buf, 256, fp); + len = strlen(append_buf); + append_buf[len-1] = 0; + fclose(fp); + return append_buf; +} + +void kexec_sh_setup_zero_page(char *zero_page_buf, size_t zero_page_size, + char *cmd_line) +{ + size_t n = zero_page_size - 0x100; + + memset(zero_page_buf, 0, zero_page_size); + + if (cmd_line) { + if (n > strlen(cmd_line)) + n = strlen(cmd_line); + + memcpy(zero_page_buf + 0x100, cmd_line, n); + zero_page_buf[0x100 + n] = '\0'; + } +} + +static int is_32bit(void) +{ + const char *cpuinfo = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + int status = 0; + + fp = fopen(cpuinfo, "r"); + if (!fp) + die("Cannot open %s\n", cpuinfo); + + while(fgets(line, sizeof(line), fp) != 0) { + const char *key = "address sizes"; + const char *value = " 32 bits physical"; + char *p; + if (strncmp(line, key, strlen(key))) + continue; + p = strchr(line + strlen(key), ':'); + if (!p) + continue; + if (!strncmp(p + 1, value, strlen(value))) + status = 1; + break; + } + + fclose(fp); + + return status; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + unsigned long seg = addr & 0xe0000000; + unsigned long long start = 0; + int have_32bit = is_32bit(); + + if (seg != 0x80000000 && (have_32bit || seg != 0xc0000000)) + die("Virtual address %p is not in P1%s\n", (void *)addr, + have_32bit ? "" : " or P2"); + + /* If 32bit addressing is used then the base of system RAM + * is an offset into physical memory. */ + if (have_32bit) { + unsigned long long end; + int ret; + + /* Assume there is only one "System RAM" region */ + ret = parse_iomem_single("System RAM\n", &start, &end); + if (ret) + die("Could not parse System RAM region " + "in /proc/iomem\n"); + } + + return addr - seg + start; +} + +/* + * add_segment() should convert base to a physical address on superh, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on superh, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} diff --git a/kexec/arch/sh/kexec-sh.h b/kexec/arch/sh/kexec-sh.h new file mode 100644 index 0000000..7d28ade --- /dev/null +++ b/kexec/arch/sh/kexec-sh.h @@ -0,0 +1,29 @@ +#ifndef KEXEC_SH_H +#define KEXEC_SH_H + +#define COMMAND_LINE_SIZE 2048 + +int uImage_sh_probe(const char *buf, off_t len); +int uImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); + +int zImage_sh_probe(const char *buf, off_t len); +int zImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void zImage_sh_usage(void); + +int elf_sh_probe(const char *buf, off_t len); +int elf_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_sh_usage(void); + +int netbsd_sh_probe(const char *buf, off_t len); +int netbsd_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void netbsd_sh_usage(void); + +char *get_append(void); +void kexec_sh_setup_zero_page(char *zero_page_buf, size_t zero_page_size, + char *cmd_line); + +#endif /* KEXEC_SH_H */ diff --git a/kexec/arch/sh/kexec-uImage-sh.c b/kexec/arch/sh/kexec-uImage-sh.c new file mode 100644 index 0000000..130f12c --- /dev/null +++ b/kexec/arch/sh/kexec-uImage-sh.c @@ -0,0 +1,24 @@ +/* + * uImage support added by Marc Andre Tanner <mat@brain-dump.org> + * + * Cloned from ARM by Paul Mundt, 2009. + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-sh.h" + +int uImage_sh_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_SH); +} + +int uImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + return zImage_sh_load(argc, argv, buf + sizeof(struct image_header), + len - sizeof(struct image_header), info); +} diff --git a/kexec/arch/sh/kexec-zImage-sh.c b/kexec/arch/sh/kexec-zImage-sh.c new file mode 100644 index 0000000..6dc2c13 --- /dev/null +++ b/kexec/arch/sh/kexec-zImage-sh.c @@ -0,0 +1,142 @@ +/* + * kexec-zImage-sh.c - kexec zImage loader for the SH + * Copyright (C) 2005 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include <arch/options.h> +#include "kexec-sh.h" + +static const int probe_debug = 0; + +#define HEAD32_KERNEL_START_ADDR 0 +#define HEAD32_DECOMPRESS_KERNEL_ADDR 1 +#define HEAD32_INIT_STACK_ADDR 2 +#define HEAD32_INIT_SR 3 +#define HEAD32_INIT_SR_VALUE 0x400000F0 + +static unsigned long zImage_head32(const char *buf, int offs) +{ + unsigned long *values = (void *)buf; + int k; + + for (k = (0x200 / 4) - 1; k > 0; k--) + if (values[k] != 0x00090009) /* not nop + nop padding*/ + return values[k - offs]; + + return 0; +} + +/* + * zImage_sh_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int zImage_sh_probe(const char *buf, off_t UNUSED(len)) +{ + if (memcmp(&buf[0x202], "HdrS", 4) != 0) + return -1; + + if (zImage_head32(buf, HEAD32_INIT_SR) != HEAD32_INIT_SR_VALUE) + return -1; + + return 0; +} + +void zImage_sh_usage(void) +{ + printf( + " --append=STRING Set the kernel command line to STRING.\n" + " --empty-zero=ADDRESS Set the kernel top ADDRESS. \n\n"); + +} + +int zImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line; + int opt; + unsigned long empty_zero, zero_page_base, zero_page_size, k; + unsigned long image_base; + char *param; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + if (!command_line) + command_line = get_append(); + + /* assume the zero page is the page before the vmlinux entry point. + * we don't know the page size though, but 64k seems to be max. + * put several 4k zero page copies before the entry point to cover + * all combinations. + */ + + empty_zero = zImage_head32(buf, HEAD32_KERNEL_START_ADDR); + + zero_page_size = 0x10000; + zero_page_base = virt_to_phys(empty_zero - zero_page_size); + + while (!valid_memory_range(info, zero_page_base, + zero_page_base + zero_page_size - 1)) { + zero_page_base += 0x1000; + zero_page_size -= 0x1000; + if (zero_page_size == 0) + die("Unable to determine zero page size from %p \n", + (void *)empty_zero); + } + + param = xmalloc(zero_page_size); + for (k = 0; k < (zero_page_size / 0x1000); k++) + kexec_sh_setup_zero_page(param + (k * 0x1000), 0x1000, + command_line); + + add_segment(info, param, zero_page_size, + 0x80000000 | zero_page_base, zero_page_size); + + /* load image a bit above the zero page, round up to 64k + * the zImage will relocate itself, but only up seems supported. + */ + + image_base = _ALIGN(empty_zero, 0x10000); + add_segment(info, buf, len, image_base, len); + info->entry = (void *)virt_to_phys(image_base); + return 0; +} diff --git a/kexec/arch/sh/netbsd_booter.S b/kexec/arch/sh/netbsd_booter.S new file mode 100644 index 0000000..d4d16df --- /dev/null +++ b/kexec/arch/sh/netbsd_booter.S @@ -0,0 +1,47 @@ + .globl netbsd_booter +netbsd_booter: + mov.l ccr,r0 + mov #0,r1 + mov.l r1,@r0 + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + mova netbsd_start,r0 + mov.l @r0,r1 + add #4,r0 + mov.l @r0,r4 ! howto + add #4,r0 + mov r0,r5 ! bootinfo + jmp @r1 + nop + nop + nop + .align 4 +ccr: .long 0xff00001c + + .align 8 +netbsd_start: + .long 0x8c001000 + .long 0x200 ! howto + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile new file mode 100644 index 0000000..275add5 --- /dev/null +++ b/kexec/arch/x86_64/Makefile @@ -0,0 +1,24 @@ +# +# kexec x86_64 (linux booting linux) +# +x86_64_KEXEC_SRCS = kexec/arch/i386/kexec-elf-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-bzImage.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-mb2-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +x86_64_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c +x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c + +x86_64_KEXEC_SRCS_native = kexec/arch/x86_64/kexec-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c + +x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native) + +# Don't add sources in i386/ to dist, as i386/Makefile adds them +dist += kexec/arch/x86_64/Makefile $(x86_64_KEXEC_SRCS_native) \ + kexec/arch/x86_64/kexec-x86_64.h \ + kexec/arch/x86_64/include/arch/options.h diff --git a/kexec/arch/x86_64/include/arch/options.h b/kexec/arch/x86_64/include/arch/options.h new file mode 120000 index 0000000..047b0f9 --- /dev/null +++ b/kexec/arch/x86_64/include/arch/options.h @@ -0,0 +1 @@ +../../../i386/include/arch/options.h
\ No newline at end of file diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c new file mode 100644 index 0000000..aba4e3b --- /dev/null +++ b/kexec/arch/x86_64/kexec-bzImage64.c @@ -0,0 +1,397 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2010 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86_64.h" +#include "../i386/x86-linux-setup.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; + +int bzImage64_probe(const char *buf, off_t len) +{ + const struct x86_linux_header *header; + + if ((uintmax_t)len < (uintmax_t)(2 * 512)) { + if (probe_debug) + fprintf(stderr, "File is too short to be a bzImage!\n"); + return -1; + } + header = (const struct x86_linux_header *)buf; + if (memcmp(header->header_magic, "HdrS", 4) != 0) { + if (probe_debug) + fprintf(stderr, "Not a bzImage\n"); + return -1; + } + if (header->boot_sector_magic != 0xAA55) { + if (probe_debug) + fprintf(stderr, "No x86 boot sector present\n"); + /* No x86 boot sector present */ + return -1; + } + if (header->protocol_version < 0x020C) { + if (probe_debug) + fprintf(stderr, "Must be at least protocol version 2.12\n"); + /* Must be at least protocol version 2.12 */ + return -1; + } + if ((header->loadflags & 1) == 0) { + if (probe_debug) + fprintf(stderr, "zImage not a bzImage\n"); + /* Not a bzImage */ + return -1; + } + if ((header->xloadflags & 3) != 3) { + if (probe_debug) + fprintf(stderr, "Not a relocatable bzImage64\n"); + /* Must be KERNEL_64 and CAN_BE_LOADED_ABOVE_4G */ + return -1; + } + +#define XLF_EFI_KEXEC (1 << 4) + if ((header->xloadflags & XLF_EFI_KEXEC) == XLF_EFI_KEXEC) + bzImage_support_efi_boot = 1; + + /* I've got a relocatable bzImage64 */ + if (probe_debug) + fprintf(stderr, "It's a relocatable bzImage64\n"); + return 0; +} + +void bzImage64_usage(void) +{ + printf( " --entry-32bit Use the kernels 32bit entry point.\n" + " --real-mode Use the kernels real mode entry point.\n" + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + ); +} + +static int do_bzImage64_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len) +{ + struct x86_linux_header setup_header; + struct x86_linux_param_header *real_mode; + int setup_sects; + size_t size; + int kern16_size; + unsigned long setup_base, setup_size, setup_header_size; + struct entry64_regs regs64; + char *modified_cmdline; + unsigned long cmdline_end; + unsigned long align, addr, k_size; + unsigned kern16_size_needed; + + /* + * Find out about the file I am about to load. + */ + if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512)) + return -1; + + memcpy(&setup_header, kernel, sizeof(setup_header)); + setup_sects = setup_header.setup_sects; + if (setup_sects == 0) + setup_sects = 4; + kern16_size = (setup_sects + 1) * 512; + if (kernel_len < kern16_size) { + fprintf(stderr, "BzImage truncated?\n"); + return -1; + } + + if ((uintmax_t)command_line_len > (uintmax_t)setup_header.cmdline_size) { + dbgprintf("Kernel command line too long for kernel!\n"); + return -1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. load_crashdump_segments will take care of + * loading the segments as high in memory as possible, hence + * in turn as away as possible from kernel to avoid being + * stomped by the kernel. + */ + if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0) + return -1; + + /* Use new command line buffer */ + command_line = modified_cmdline; + command_line_len = strlen(command_line) + 1; + } + + /* x86_64 purgatory could be anywhere */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, -1, -1, 0); + dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr); + /* The argument/parameter segment */ + kern16_size_needed = kern16_size; + if (kern16_size_needed < 4096) + kern16_size_needed = 4096; + setup_size = kern16_size_needed + command_line_len + + PURGATORY_CMDLINE_SIZE; + real_mode = xmalloc(setup_size); + memset(real_mode, 0, setup_size); + + /* only copy setup_header */ + setup_header_size = kernel[0x201] + 0x202 - 0x1f1; + if (setup_header_size > 0x7f) + setup_header_size = 0x7f; + memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1, + setup_header_size); + + /* No real mode code will be executing. setup segment can be loaded + * anywhere as we will be just reading command line. + */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, -1, -1); + + dbgprintf("Loaded real_mode_data and command line at 0x%lx\n", + setup_base); + + /* The main kernel segment */ + k_size = kernel_len - kern16_size; + /* need to use run-time size for buffer searching */ + dbgprintf("kernel init_size 0x%x\n", real_mode->init_size); + size = _ALIGN(real_mode->init_size, 4096); + align = real_mode->kernel_alignment; + addr = add_buffer(info, kernel + kern16_size, k_size, + size, align, 0x100000, -1, -1); + if (addr == ULONG_MAX) + die("can not load bzImage64"); + dbgprintf("Loaded 64bit kernel at 0x%lx\n", addr); + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters_high(info, real_mode, setup_base, + kern16_size_needed, command_line, command_line_len, + initrd, initrd_len, 1); /* put initrd high too */ + + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s64, + sizeof(regs64)); + regs64.rbx = 0; /* Bootstrap processor */ + regs64.rsi = setup_base; /* Pointer to the parameters */ + regs64.rip = addr + 0x200; /* the entry point for startup_64 */ + regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s64, + sizeof(regs64)); + + cmdline_end = setup_base + kern16_size_needed + command_line_len - 1; + elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end, + sizeof(unsigned long)); + + /* Fill in the information BIOS calls would normally provide. */ + setup_linux_system_parameters(info, real_mode); + + return 0; +} + +/* This assumes file is being loaded using file based kexec syscall */ +int bzImage64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *command_line = NULL, *tmp_cmdline = NULL; + const char *ramdisk = NULL, *append = NULL; + int entry_16bit = 0, entry_32bit = 0; + int opt; + int command_line_len; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "entry-32bit", 0, 0, OPT_ENTRY_32BIT }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + entry_16bit = 1; + break; + case OPT_ENTRY_32BIT: + entry_32bit = 1; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + if (entry_16bit || entry_32bit) { + fprintf(stderr, "Kexec2 syscall does not support 16bit" + " or 32bit entry yet\n"); + ret = -1; + goto out; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = command_line; + info->command_line_len = command_line_len; + return ret; +out: + free(command_line); + return ret; +} + +int bzImage64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line = NULL, *tmp_cmdline = NULL; + const char *ramdisk = NULL, *append = NULL; + char *ramdisk_buf; + off_t ramdisk_length = 0; + int command_line_len; + int entry_16bit = 0, entry_32bit = 0; + int opt; + int result; + + if (info->file_mode) + return bzImage64_load_file(argc, argv, info); + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "entry-32bit", 0, 0, OPT_ENTRY_32BIT }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + entry_16bit = 1; + break; + case OPT_ENTRY_32BIT: + entry_32bit = 1; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + ramdisk_buf = 0; + if (ramdisk) + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + + if (entry_16bit || entry_32bit) + result = do_bzImage_load(info, buf, len, command_line, + command_line_len, ramdisk_buf, + ramdisk_length, 0, 0, entry_16bit); + else + result = do_bzImage64_load(info, buf, len, command_line, + command_line_len, ramdisk_buf, + ramdisk_length); + + free(command_line); + return result; +} diff --git a/kexec/arch/x86_64/kexec-elf-rel-x86_64.c b/kexec/arch/x86_64/kexec-elf-rel-x86_64.c new file mode 100644 index 0000000..0d22f3b --- /dev/null +++ b/kexec/arch/x86_64/kexec-elf-rel-x86_64.c @@ -0,0 +1,94 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS64 && + ehdr->ei_class != ELFCLASS32) { /* x32 */ + return 0; + } + if (ehdr->e_machine != EM_X86_64) { + return 0; + } + return 1; +} + +static const char *reloc_name(unsigned long r_type) +{ + static const char *r_name[] = { + "R_X86_64_NONE", + "R_X86_64_64", + "R_X86_64_PC32", + "R_X86_64_GOT32", + "R_X86_64_PLT32", + "R_X86_64_COPY", + "R_X86_64_GLOB_DAT", + "R_X86_64_JUMP_SLOT", + "R_X86_64_RELATIVE", + "R_X86_64_GOTPCREL", + "R_X86_64_32", + "R_X86_64_32S", + "R_X86_64_16", + "R_X86_64_PC16", + "R_X86_64_8", + "R_X86_64_PC8", + "R_X86_64_DTPMOD64", + "R_X86_64_DTPOFF64", + "R_X86_64_TPOFF64", + "R_X86_64_TLSGD", + "R_X86_64_TLSLD", + "R_X86_64_DTPOFF32", + "R_X86_64_GOTTPOFF", + "R_X86_64_TPOFF32", + }; + static char buf[100]; + const char *name; + if (r_type < (sizeof(r_name)/sizeof(r_name[0]))){ + name = r_name[r_type]; + } + else { + sprintf(buf, "R_X86_64_%lu", r_type); + name = buf; + } + return name; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + dbgprintf("%s\n", reloc_name(r_type)); + switch(r_type) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(uint64_t *)location = value; + break; + case R_X86_64_32: + *(uint32_t *)location = value; + if (value != *(uint32_t *)location) + goto overflow; + break; + case R_X86_64_32S: + *(uint32_t *)location = value; + if ((int64_t)value != *(int32_t *)location) + goto overflow; + break; + case R_X86_64_PC32: + case R_X86_64_PLT32: + *(uint32_t *)location = value - address; + break; + default: + die("Unhandled rela relocation: %s\n", reloc_name(r_type)); + break; + } + return; + overflow: + die("overflow in relocation type %s val %lx\n", + reloc_name(r_type), value); +} diff --git a/kexec/arch/x86_64/kexec-elf-x86_64.c b/kexec/arch/x86_64/kexec-elf-x86_64.c new file mode 100644 index 0000000..7f9540a --- /dev/null +++ b/kexec/arch/x86_64/kexec-elf-x86_64.c @@ -0,0 +1,257 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "../i386/x86-linux-setup.h" +#include "kexec-x86_64.h" +#include "../i386/kexec-x86.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +int elf_x86_64_probe(const char *buf, off_t len) +{ + return elf_x86_any_probe(buf, len, CORE_TYPE_ELF64); +} + +void elf_x86_64_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --args-linux Pass linux kernel style options\n" + " --args-elf Pass elf boot notes\n" + " --args-none Jump directly from the kernel\n" + ); +} + +int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *append = NULL; + char *command_line = NULL, *modified_cmdline; + char *tmp_cmdline = NULL; + int command_line_len; + const char *ramdisk; + unsigned long entry, max_addr; + int arg_style; +#define ARG_STYLE_ELF 0 +#define ARG_STYLE_LINUX 1 +#define ARG_STYLE_NONE 2 + int opt; + int result = 0; + const char *error_msg = NULL; + + /* See options.h and add any new options there too! */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "args-elf", 0, NULL, OPT_ARGS_ELF }, + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, + { "args-none", 0, NULL, OPT_ARGS_NONE }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + arg_style = ARG_STYLE_ELF; + modified_cmdline = 0; + ramdisk = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + fprintf(stderr, "Unknown option: opt: %d\n", opt); + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_ARGS_ELF: + arg_style = ARG_STYLE_ELF; + break; + case OPT_ARGS_LINUX: + arg_style = ARG_STYLE_LINUX; + break; + case OPT_ARGS_NONE: +#ifdef __x86_64__ + arg_style = ARG_STYLE_NONE; +#else + die("--args-none only works on arch x86_64\n"); +#endif + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Do we want arguments? */ + if (arg_style != ARG_STYLE_NONE) { + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + } + if (arg_style == ARG_STYLE_NONE) { + info->entry = (void *)entry; + + } + else if (arg_style == ARG_STYLE_ELF) { + unsigned long note_base; + struct entry64_regs regs; + + /* Setup the ELF boot notes */ + note_base = elf_boot_notes(info, max_addr, command_line, command_line_len); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + regs.rdi = note_base; /* The notes (arg1) */ + regs.rip = entry; /* The entry point */ + regs.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + + if (ramdisk) { + error_msg = "Ramdisks not supported with generic elf arguments"; + goto out; + } + } + else if (arg_style == ARG_STYLE_LINUX) { + struct x86_linux_faked_param_header *hdr; + unsigned long param_base; + char *ramdisk_buf; + off_t ramdisk_length; + struct entry64_regs regs; + int rc=0; + + /* Get the linux parameter header */ + hdr = xmalloc(sizeof(*hdr)); + param_base = add_buffer(info, hdr, sizeof(*hdr), sizeof(*hdr), + 16, 0, max_addr, 1); + + /* Initialize the parameter header */ + memset(hdr, 0, sizeof(*hdr)); + init_linux_parameters(&hdr->hdr); + + /* Add a ramdisk to the current image */ + ramdisk_buf = 0; + ramdisk_length = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + rc = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (rc < 0) { + result = -1; + goto out; + } + /* Use new command line. */ + free(command_line); + command_line = modified_cmdline; + command_line_len = strlen(modified_cmdline) + 1; + modified_cmdline = NULL; + } + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, &hdr->hdr, param_base, + offsetof(struct x86_linux_faked_param_header, command_line), + command_line, command_line_len, + ramdisk_buf, ramdisk_length); + + /* Fill in the information bios calls would usually provide */ + setup_linux_system_parameters(info, &hdr->hdr); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + regs.rbx = 0; /* Bootstrap processor */ + regs.rsi = param_base; /* Pointer to the parameters */ + regs.rip = entry; /* the entry point */ + regs.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + } + else { + error_msg = "Unknown argument style\n"; + } + +out: + free(command_line); + free(modified_cmdline); + if (error_msg) + die("%s", error_msg); + return result; +} diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c new file mode 100644 index 0000000..ffd84f0 --- /dev/null +++ b/kexec/arch/x86_64/kexec-x86_64.c @@ -0,0 +1,190 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86_64.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +struct file_type file_type[] = { + { "multiboot2-x86", multiboot2_x86_probe, multiboot2_x86_load, + multiboot2_x86_usage }, + { "elf-x86_64", elf_x86_64_probe, elf_x86_64_load, elf_x86_64_usage }, + { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, + multiboot_x86_usage }, + { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, + { "bzImage64", bzImage64_probe, bzImage64_load, bzImage64_usage }, + { "bzImage", bzImage_probe, bzImage_load, bzImage_usage }, + { "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage }, + { "nbi-x86", nbi_probe, nbi_load, nbi_usage }, +}; +int file_types = sizeof(file_type)/sizeof(file_type[0]); + + +void arch_usage(void) +{ + printf( + " --reset-vga Attempt to reset a standard vga device\n" + " --serial=<port> Specify the serial port for debug output\n" + " --serial-baud=<baud_rate> Specify the serial port baud rate\n" + " --console-vga Enable the vga console\n" + " --console-serial Enable the serial console\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" + " --reuse-video-type Reuse old boot time video type blindly\n" + ); +} + +struct arch_options_t arch_options = { + .reset_vga = 0, + .serial_base = 0x3f8, + .serial_baud = 0, + .console_vga = 0, + .console_serial = 0, + .core_header_type = CORE_TYPE_ELF64, + .pass_memmap_cmdline = 0, + .noefi = 0, + .reuse_video_type = 0, +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + unsigned long value; + char *end; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_RESET_VGA: + arch_options.reset_vga = 1; + break; + case OPT_CONSOLE_VGA: + arch_options.console_vga = 1; + break; + case OPT_CONSOLE_SERIAL: + arch_options.console_serial = 1; + break; + case OPT_SERIAL: + value = ULONG_MAX; + if (strcmp(optarg, "ttyS0") == 0) { + value = 0x3f8; + } + else if (strcmp(optarg, "ttyS1") == 0) { + value = 0x2f8; + } + else if (strncmp(optarg, "0x", 2) == 0) { + value = strtoul(optarg +2, &end, 16); + if (*end != '\0') { + value = ULONG_MAX; + } + } + if (value >= 65536) { + fprintf(stderr, "Bad serial port base '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_base = value; + break; + case OPT_SERIAL_BAUD: + value = strtoul(optarg, &end, 0); + if ((value > 115200) || ((115200 %value) != 0) || + (value < 9600) || (*end)) + { + fprintf(stderr, "Bad serial port baud rate '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_baud = value; + break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; + case OPT_REUSE_VIDEO_TYPE: + arch_options.reuse_video_type = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "x86_64", KEXEC_ARCH_X86_64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *info) +{ + uint8_t panic_kernel = 0; + + elf_rel_set_symbol(&info->rhdr, "reset_vga", + &arch_options.reset_vga, sizeof(arch_options.reset_vga)); + elf_rel_set_symbol(&info->rhdr, "serial_base", + &arch_options.serial_base, sizeof(arch_options.serial_base)); + elf_rel_set_symbol(&info->rhdr, "serial_baud", + &arch_options.serial_baud, sizeof(arch_options.serial_baud)); + elf_rel_set_symbol(&info->rhdr, "console_vga", + &arch_options.console_vga, sizeof(arch_options.console_vga)); + elf_rel_set_symbol(&info->rhdr, "console_serial", + &arch_options.console_serial, sizeof(arch_options.console_serial)); + elf_rel_set_symbol(&info->rhdr, "backup_src_start", + &info->backup_src_start, sizeof(info->backup_src_start)); + elf_rel_set_symbol(&info->rhdr, "backup_src_size", + &info->backup_src_size, sizeof(info->backup_src_size)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + panic_kernel = 1; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &info->backup_start, sizeof(info->backup_start)); + } + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &panic_kernel, sizeof(panic_kernel)); +} diff --git a/kexec/arch/x86_64/kexec-x86_64.h b/kexec/arch/x86_64/kexec-x86_64.h new file mode 100644 index 0000000..21c3a73 --- /dev/null +++ b/kexec/arch/x86_64/kexec-x86_64.h @@ -0,0 +1,41 @@ +#ifndef KEXEC_X86_64_H +#define KEXEC_X86_64_H + +#include "../i386/kexec-x86.h" + +struct entry64_regs { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t rbp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; +}; + +int elf_x86_64_probe(const char *buf, off_t len); +int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_x86_64_usage(void); + +int bzImage64_probe(const char *buf, off_t len); +int bzImage64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void bzImage64_usage(void); + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot2_x86_usage(void); +int multiboot2_x86_probe(const char *buf, off_t buf_len); + +#endif /* KEXEC_X86_64_H */ |