diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:56:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:56:35 +0000 |
commit | eba0cfa6b0bef4f2e73c8630a7efa3944df8b0f8 (patch) | |
tree | 74c37eede1f0634cc5de1c63c934edaa1630c6bc /kexec | |
parent | Initial commit. (diff) | |
download | kexec-tools-eba0cfa6b0bef4f2e73c8630a7efa3944df8b0f8.tar.xz kexec-tools-eba0cfa6b0bef4f2e73c8630a7efa3944df8b0f8.zip |
Adding upstream version 1:2.0.27.upstream/1%2.0.27upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
207 files changed, 37781 insertions, 0 deletions
diff --git a/kexec-tools.spec b/kexec-tools.spec new file mode 100644 index 0000000..169aceb --- /dev/null +++ b/kexec-tools.spec @@ -0,0 +1,42 @@ +Summary: Load one kernel from another +Name: kexec-tools +Version: 2.0.27 +Release: 0 +License: GPL +Group: Development/Tools +Source0:%{name}-%{version}.tar.gz +Packager: Eric Biederman <ebiederman@xmission.com> +BuildRoot: %{_tmppath}/%{name} + +%description +/sbin/kexec is a user space utility for loading another kernel +and asking the currently running kernel to do something with it. +A currently running kernel may be asked to start the loaded +kernel on reboot, or to start the loaded kernel after it panics. + +The panic case is useful for having an intact kernel for writing +crash dumps. But other uses may be imagined. + +%prep +%setup -q -n %{name}-%{version} + +%build +%configure +make + +%install +make install DESTDIR=${RPM_BUILD_ROOT} + +%files +%defattr(-,root,root) +%{_sbindir}/kexec +%{_sbindir}/vmcore-dmesg +%doc News +%doc COPYING +%doc TODO +%{_mandir}/man8/kexec.8.gz +%{_mandir}/man8/vmcore-dmesg.8.gz + +%changelog +* Tue Dec 16 2004 Eric Biederman <ebiederman@lnxi.com> +- kexec-tools initialy packaged as an rpm. diff --git a/kexec-tools.spec.in b/kexec-tools.spec.in new file mode 100644 index 0000000..3e57a22 --- /dev/null +++ b/kexec-tools.spec.in @@ -0,0 +1,42 @@ +Summary: Load one kernel from another +Name: kexec-tools +Version: +Release: 0 +License: GPL +Group: Development/Tools +Source0:%{name}-%{version}.tar.gz +Packager: Eric Biederman <ebiederman@xmission.com> +BuildRoot: %{_tmppath}/%{name} + +%description +/sbin/kexec is a user space utility for loading another kernel +and asking the currently running kernel to do something with it. +A currently running kernel may be asked to start the loaded +kernel on reboot, or to start the loaded kernel after it panics. + +The panic case is useful for having an intact kernel for writing +crash dumps. But other uses may be imagined. + +%prep +%setup -q -n %{name}-%{version} + +%build +%configure +make + +%install +make install DESTDIR=${RPM_BUILD_ROOT} + +%files +%defattr(-,root,root) +%{_sbindir}/kexec +%{_sbindir}/vmcore-dmesg +%doc News +%doc COPYING +%doc TODO +%{_mandir}/man8/kexec.8.gz +%{_mandir}/man8/vmcore-dmesg.8.gz + +%changelog +* Tue Dec 16 2004 Eric Biederman <ebiederman@lnxi.com> +- kexec-tools initialy packaged as an rpm. diff --git a/kexec/Makefile b/kexec/Makefile new file mode 100644 index 0000000..11682bf --- /dev/null +++ b/kexec/Makefile @@ -0,0 +1,126 @@ +# +# kexec (linux booting linux) +# +PURGATORY_HEX_C = kexec/purgatory.c + +$(PURGATORY_HEX_C): $(PURGATORY) $(BIN_TO_HEX) + $(MKDIR) -p $(@D) + $(BIN_TO_HEX) purgatory < $(PURGATORY) > $@ + +KEXEC_SRCS = $(KEXEC_SRCS_base) +KEXEC_GENERATED_SRCS = + +KEXEC_SRCS_base += kexec/kexec.c +KEXEC_SRCS_base += kexec/ifdown.c +KEXEC_SRCS_base += kexec/kexec-elf.c +KEXEC_SRCS_base += kexec/kexec-elf-exec.c +KEXEC_SRCS_base += kexec/kexec-elf-core.c +KEXEC_SRCS_base += kexec/kexec-elf-rel.c +KEXEC_SRCS_base += kexec/kexec-elf-boot.c +KEXEC_SRCS_base += kexec/kexec-pe-zboot.c +KEXEC_SRCS_base += kexec/kexec-iomem.c +KEXEC_SRCS_base += kexec/firmware_memmap.c +KEXEC_SRCS_base += kexec/crashdump.c +KEXEC_SRCS_base += kexec/crashdump-xen.c +KEXEC_SRCS_base += kexec/phys_arch.c +KEXEC_SRCS_base += kexec/lzma.c +KEXEC_SRCS_base += kexec/zlib.c +KEXEC_SRCS_base += kexec/kexec-xen.c +KEXEC_SRCS_base += kexec/symbols.c + +KEXEC_GENERATED_SRCS += $(PURGATORY_HEX_C) + +dist += kexec/Makefile \ + $(KEXEC_SRCS_base) kexec/crashdump-elf.c \ + kexec/crashdump.h kexec/firmware_memmap.h \ + kexec/kexec-elf-boot.h \ + kexec/kexec-elf.h kexec/kexec-sha256.h \ + kexec/kexec-zlib.h kexec/kexec-lzma.h \ + kexec/kexec-xen.h \ + kexec/kexec-syscall.h kexec/kexec.h kexec/kexec.8 + +dist += kexec/proc_iomem.c +$(ARCH)_PROC_IOMEM = kexec/proc_iomem.c +KEXEC_SRCS += $($(ARCH)_PROC_IOMEM) + +dist += kexec/virt_to_phys.c +$(ARCH)_VIRT_TO_PHYS = kexec/virt_to_phys.c +KEXEC_SRCS += $($(ARCH)_VIRT_TO_PHYS) + +dist += kexec/phys_to_virt.c +$(ARCH)_PHYS_TO_VIRT = kexec/phys_to_virt.c +KEXEC_SRCS += $($(ARCH)_PHYS_TO_VIRT) + +dist += kexec/add_segment.c +$(ARCH)_ADD_SEGMENT = kexec/add_segment.c +KEXEC_SRCS += $($(ARCH)_ADD_SEGMENT) + +dist += kexec/add_buffer.c +$(ARCH)_ADD_BUFFER = kexec/add_buffer.c +KEXEC_SRCS += $($(ARCH)_ADD_BUFFER) + +dist += kexec/arch_reuse_initrd.c +$(ARCH)_ARCH_REUSE_INITRD = kexec/arch_reuse_initrd.c +KEXEC_SRCS += $($(ARCH)_ARCH_REUSE_INITRD) + +dist += kexec/kexec-uImage.c +$(ARCH)_UIMAGE = +KEXEC_SRCS += $($(ARCH)_UIMAGE) + +dist += kexec/fs2dt.c kexec/fs2dt.h +$(ARCH)_FS2DT = +KEXEC_SRCS += $($(ARCH)_FS2DT) + +dist += kexec/mem_regions.c kexec/mem_regions.h +$(ARCH)_MEM_REGIONS = +KEXEC_SRCS += $($(ARCH)_MEM_REGIONS) + +dist += kexec/dt-ops.c kexec/dt-ops.h +$(ARCH)_DT_OPS = +KEXEC_SRCS += $($(ARCH)_DT_OPS) + +include $(srcdir)/kexec/arch/alpha/Makefile +include $(srcdir)/kexec/arch/arm/Makefile +include $(srcdir)/kexec/arch/arm64/Makefile +include $(srcdir)/kexec/arch/i386/Makefile +include $(srcdir)/kexec/arch/ia64/Makefile +include $(srcdir)/kexec/arch/m68k/Makefile +include $(srcdir)/kexec/arch/mips/Makefile +include $(srcdir)/kexec/arch/cris/Makefile +include $(srcdir)/kexec/arch/ppc/Makefile +include $(srcdir)/kexec/arch/ppc64/Makefile +include $(srcdir)/kexec/arch/s390/Makefile +include $(srcdir)/kexec/arch/sh/Makefile +include $(srcdir)/kexec/arch/x86_64/Makefile +include $(srcdir)/kexec/arch/hppa/Makefile +include $(srcdir)/kexec/arch/loongarch/Makefile + +KEXEC_SRCS += $($(ARCH)_KEXEC_SRCS) + +KEXEC_OBJS = $(call objify, $(KEXEC_SRCS) $(KEXEC_GENERATED_SRCS)) +KEXEC_DEPS = $(call depify, $(KEXEC_OBJS)) + +clean += $(KEXEC_OBJS) $(KEXEC_DEPS) $(KEXEC_GENERATED_SRCS) \ + $(KEXEC) $(KEXEC_MANPAGE) + +KEXEC = $(SBINDIR)/kexec +KEXEC_MANPAGE = $(MANDIR)/man8/kexec.8 + +-include $(KEXEC_DEPS) + +$(KEXEC): $(KEXEC_OBJS) $(UTIL_LIB) + @$(MKDIR) -p $(@D) + $(LINK.o) -o $@ $^ $(CFLAGS) $(LIBS) + +$(KEXEC): CPPFLAGS+=-I$(srcdir)/kexec/arch/$(ARCH)/include + +kexec/fs2dt.o: CPPFLAGS+=$($(ARCH)_FS2DT_INCLUDE) + +$(KEXEC_MANPAGE): kexec/kexec.8 + @$(MKDIR) -p $(MANDIR)/man8 + cp $^ $(KEXEC_MANPAGE) +echo:: + @echo "KEXEC_SRCS $(KEXEC_SRCS)" + @echo "KEXEC_DEPS $(KEXEC_DEPS)" + @echo "KEXEC_OBJS $(KEXEC_OBJS)" + diff --git a/kexec/add_buffer.c b/kexec/add_buffer.c new file mode 100644 index 0000000..4d4a55f --- /dev/null +++ b/kexec/add_buffer.c @@ -0,0 +1,14 @@ +#include "kexec.h" + +unsigned long add_buffer(struct kexec_info *info, + const void *buf, + unsigned long bufsz, + unsigned long memsz, + unsigned long buf_align, + unsigned long buf_min, + unsigned long buf_max, + int buf_end) +{ + return add_buffer_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end); +} diff --git a/kexec/add_segment.c b/kexec/add_segment.c new file mode 100644 index 0000000..029c376 --- /dev/null +++ b/kexec/add_segment.c @@ -0,0 +1,8 @@ +#include "kexec.h" + +void add_segment(struct kexec_info *info, + const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + return add_segment_phys_virt(info, buf, bufsz, base, memsz, 0); +} diff --git a/kexec/arch/alpha/Makefile b/kexec/arch/alpha/Makefile new file mode 100644 index 0000000..4575d61 --- /dev/null +++ b/kexec/arch/alpha/Makefile @@ -0,0 +1,4 @@ +alpha_KEXEC_SRCS= +dist += kexec/arch/alpha/Makefile kexec/arch/alpha/include/arch/options.h + $(alpha_KEXEC_SRCS) + diff --git a/kexec/arch/alpha/include/arch/options.h b/kexec/arch/alpha/include/arch/options.h new file mode 100644 index 0000000..a012c8a --- /dev/null +++ b/kexec/arch/alpha/include/arch/options.h @@ -0,0 +1,20 @@ +#ifndef KEXEC_ARCH_ALPHA_OPTIONS_H +#define KEXEC_ARCH_ALPHA_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* See the other architectures for details of these; Alpha has no + * loader-specific options yet. + */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_ALPHA_OPTIONS_H */ diff --git a/kexec/arch/arm/Makefile b/kexec/arch/arm/Makefile new file mode 100644 index 0000000..4454f47 --- /dev/null +++ b/kexec/arch/arm/Makefile @@ -0,0 +1,34 @@ +# +# kexec arm (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +arm_FS2DT = kexec/fs2dt.c +arm_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/arm/crashdump-arm.h \ + -include $(srcdir)/kexec/arch/arm/kexec-arm.h + +arm_MEM_REGIONS = kexec/mem_regions.c + +arm_KEXEC_SRCS= kexec/arch/arm/kexec-elf-rel-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-zImage-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-uImage-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/kexec-arm.c +arm_KEXEC_SRCS+= kexec/arch/arm/crashdump-arm.c +arm_KEXEC_SRCS+= kexec/fs2dt.c + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +arm_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +# We want 64-bit file IO for kdump to work correctly on LPAE systems +arm_CPPFLAGS += -D_FILE_OFFSET_BITS=64 + +arm_KEXEC_SRCS += $(libfdt_SRCS) + +arm_UIMAGE = kexec/kexec-uImage.c +arm_PHYS_TO_VIRT = kexec/arch/arm/phys_to_virt.c + +dist += kexec/arch/arm/Makefile $(arm_KEXEC_SRCS) $(arm_PHYS_TO_VIRT) \ + kexec/arch/arm/iomem.h kexec/arch/arm/phys_to_virt.h \ + kexec/arch/arm/crashdump-arm.h kexec/arch/arm/kexec-arm.h \ + kexec/arch/arm/include/arch/options.h diff --git a/kexec/arch/arm/crashdump-arm.c b/kexec/arch/arm/crashdump-arm.c new file mode 100644 index 0000000..1ec1826 --- /dev/null +++ b/kexec/arch/arm/crashdump-arm.c @@ -0,0 +1,388 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) Nokia Corporation, 2010. + * Author: Mika Westerberg + * + * Based on x86 implementation + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <limits.h> +#include <elf.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../crashdump.h" +#include "../../mem_regions.h" +#include "crashdump-arm.h" +#include "iomem.h" +#include "phys_to_virt.h" + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ELFDATANATIVE ELFDATA2MSB +#else +#error "Unknown machine endian" +#endif + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +static struct memory_range crash_memory_ranges[CRASH_MAX_MEMORY_RANGES]; +struct memory_ranges usablemem_rgns = { + .max_size = CRASH_MAX_MEMORY_RANGES, + .ranges = crash_memory_ranges, +}; + +/* The boot-time physical memory range reserved for crashkernel region */ +struct memory_range crash_kernel_mem; + +/* reserved regions */ +#define CRASH_MAX_RESERVED_RANGES 2 +static struct memory_range crash_reserved_ranges[CRASH_MAX_RESERVED_RANGES]; +static struct memory_ranges crash_reserved_rgns = { + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_ranges, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info = { + .class = ELFCLASS32, + .data = ELFDATANATIVE, + .machine = EM_ARM, + .page_offset = DEFAULT_PAGE_OFFSET, +}; + +extern unsigned long long user_page_offset; + +static int get_kernel_page_offset(struct kexec_info *info, + struct crash_elf_info *elf_info) +{ + unsigned long long stext_sym_addr = get_kernel_sym("_stext"); + if (stext_sym_addr == 0) { + if (user_page_offset != (-1ULL)) { + elf_info->page_offset = user_page_offset; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use user provided vaule: %llx\n", + elf_info->page_offset); + return 0; + } + elf_info->page_offset = (unsigned long long)DEFAULT_PAGE_OFFSET; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use default: %llx\n", + elf_info->page_offset); + return 0; + } else if ((user_page_offset != (-1ULL)) && + (user_page_offset != stext_sym_addr)) { + fprintf(stderr, "PAGE_OFFSET is set to %llx " + "instead of user provided value %llx\n", + stext_sym_addr & (~KVBASE_MASK), + user_page_offset); + } + elf_info->page_offset = stext_sym_addr & (~KVBASE_MASK); + return 0; +} + +/** + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory regions + * in @crash_memory_ranges. Number of memory regions found is placed in + * @crash_memory_nr_ranges. Regions are sorted in ascending order. + * + * Returns %0 in case of success and %-1 otherwise (errno is set). + */ +static int crash_get_memory_ranges(void) +{ + int i; + + if (usablemem_rgns.size < 1) { + errno = EINVAL; + return -1; + } + + dbgprint_mem_range("Reserved memory ranges", + crash_reserved_rgns.ranges, + crash_reserved_rgns.size); + + /* + * Exclude all reserved memory from the usable memory regions. + * We want to avoid dumping the crashkernel region itself. Note + * that this may result memory regions in usablemem_rgns being + * split. + */ + for (i = 0; i < crash_reserved_rgns.size; i++) { + if (mem_regions_exclude(&usablemem_rgns, + &crash_reserved_rgns.ranges[i])) { + fprintf(stderr, + "Error: Number of crash memory ranges excedeed the max limit\n"); + errno = ENOMEM; + return -1; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&usablemem_rgns); + + dbgprint_mem_range("Coredump memory ranges", + usablemem_rgns.ranges, usablemem_rgns.size); + + return 0; +} + +/** + * cmdline_add_elfcorehdr() - adds elfcorehdr= to @cmdline + * @cmdline: buffer where parameter is placed + * @elfcorehdr: physical address of elfcorehdr + * + * Function appends 'elfcorehdr=start' at the end of the command line given in + * @cmdline. Note that @cmdline must be at least %COMMAND_LINE_SIZE bytes long + * (inclunding %NUL). + */ +static void cmdline_add_elfcorehdr(char *cmdline, unsigned long elfcorehdr) +{ + char buf[COMMAND_LINE_SIZE]; + int buflen; + + buflen = snprintf(buf, sizeof(buf), "%s elfcorehdr=%#lx", + cmdline, elfcorehdr); + if (buflen < 0) + die("Failed to construct elfcorehdr= command line parameter\n"); + if (buflen >= sizeof(buf)) + die("Command line overflow\n"); + + (void) strncpy(cmdline, buf, COMMAND_LINE_SIZE); + cmdline[COMMAND_LINE_SIZE - 1] = '\0'; +} + +/** + * cmdline_add_mem() - adds mem= parameter to kernel command line + * @cmdline: buffer where parameter is placed + * @size: size of the kernel reserved memory (in bytes) + * + * This function appends 'mem=size' at the end of the command line given in + * @cmdline. Note that @cmdline must be at least %COMMAND_LINE_SIZE bytes long + * (including %NUL). + */ +static void cmdline_add_mem(char *cmdline, unsigned long size) +{ + char buf[COMMAND_LINE_SIZE]; + int buflen; + + buflen = snprintf(buf, sizeof(buf), "%s mem=%ldK", cmdline, size >> 10); + if (buflen < 0) + die("Failed to construct mem= command line parameter\n"); + if (buflen >= sizeof(buf)) + die("Command line overflow\n"); + + (void) strncpy(cmdline, buf, COMMAND_LINE_SIZE); + cmdline[COMMAND_LINE_SIZE - 1] = '\0'; +} + +static unsigned long long range_size(const struct memory_range *r) +{ + return r->end - r->start + 1; +} + +static void dump_memory_ranges(void) +{ + int i; + + if (!kexec_debug) + return; + + dbgprintf("crashkernel: [%#llx - %#llx] (%ldM)\n", + crash_kernel_mem.start, crash_kernel_mem.end, + (unsigned long)range_size(&crash_kernel_mem) >> 20); + + for (i = 0; i < usablemem_rgns.size; i++) { + struct memory_range *r = usablemem_rgns.ranges + i; + dbgprintf("memory range: [%#llx - %#llx] (%ldM)\n", + r->start, r->end, (unsigned long)range_size(r) >> 20); + } +} + +/** + * load_crashdump_segments() - loads additional segments needed for kdump + * @info: kexec info structure + * @mod_cmdline: kernel command line + * + * This function loads additional segments which are needed for the dump capture + * kernel. It also updates kernel command line passed in @mod_cmdline to have + * right parameters for the dump capture kernel. + * + * Return %0 in case of success and %-1 in case of error. + */ +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + int last_ranges; + + /* + * First fetch all the memory (RAM) ranges that we are going to pass to + * the crashdump kernel during panic. + */ + err = crash_get_memory_ranges(); + if (err) + return err; + + /* + * Now that we have memory regions sorted, we can use first memory + * region as PHYS_OFFSET. + */ + phys_offset = usablemem_rgns.ranges->start; + + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + dbgprintf("phys offset = %#llx, page offset = %llx\n", + phys_offset, elf_info.page_offset); + + /* + * Ensure that the crash kernel memory range is sane. The crash kernel + * must be located within memory which is visible during booting. + */ + if (crash_kernel_mem.end > ARM_MAX_VIRTUAL) { + fprintf(stderr, + "Crash kernel memory [0x%llx-0x%llx] is inaccessible at boot - unable to load crash kernel\n", + crash_kernel_mem.start, crash_kernel_mem.end); + return -1; + } + + last_ranges = usablemem_rgns.size - 1; + if (last_ranges < 0) + last_ranges = 0; + + if (crash_memory_ranges[last_ranges].end > UINT32_MAX) { + dbgprintf("Using 64-bit ELF core format\n"); + + /* for support LPAE enabled kernel*/ + elf_info.class = ELFCLASS64; + + err = crash_create_elf64_headers(info, &elf_info, + usablemem_rgns.ranges, + usablemem_rgns.size, &buf, &bufsz, + ELF_CORE_HEADER_ALIGN); + } else { + dbgprintf("Using 32-bit ELF core format\n"); + err = crash_create_elf32_headers(info, &elf_info, + usablemem_rgns.ranges, + usablemem_rgns.size, &buf, &bufsz, + ELF_CORE_HEADER_ALIGN); + } + if (err) + return err; + + /* + * We allocate ELF core header from the end of the memory area reserved + * for the crashkernel. We align the header to SECTION_SIZE (which is + * 1MB) so that available memory passed in kernel command line will be + * aligned to 1MB. This is because kernel create_mapping() wants memory + * regions to be aligned to SECTION_SIZE. + */ + elfcorehdr = add_buffer_phys_virt(info, buf, bufsz, bufsz, 1 << 20, + crash_kernel_mem.start, + crash_kernel_mem.end, -1, 0); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("elfcorehdr 0x%llx-0x%llx\n", elfcorehdr_mem.start, + elfcorehdr_mem.end); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + /* + * Add 'mem=size' parameter to dump capture kernel command line. This + * prevents the dump capture kernel from using any other memory regions + * which belong to the primary kernel. + */ + cmdline_add_mem(mod_cmdline, elfcorehdr - crash_kernel_mem.start); + + dump_memory_ranges(); + dbgprintf("kernel command line: \"%s\"\n", mod_cmdline); + + return 0; +} + +/** + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region (not NULL terminated) + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called for each memory range in /proc/iomem, stores + * the location of the crash kernel range into @crash_kernel_mem, and + * stores the system RAM into @usablemem_rgns. + */ +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL_BOOT, strlen(CRASH_KERNEL_BOOT)) == 0) { + crash_kernel_mem.start = base; + crash_kernel_mem.end = base + length - 1; + crash_kernel_mem.type = RANGE_RAM; + return mem_regions_add(&crash_reserved_rgns, + base, length, RANGE_RAM); + } + else if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) { + if (crash_kernel_mem.start == crash_kernel_mem.end) { + crash_kernel_mem.start = base; + crash_kernel_mem.end = base + length - 1; + crash_kernel_mem.type = RANGE_RAM; + } + return mem_regions_add(&crash_reserved_rgns, + base, length, RANGE_RAM); + } + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + return mem_regions_add(&usablemem_rgns, + base, length, RANGE_RAM); + } + return 0; +} + +/** + * is_crashkernel_mem_reserved() - check for the crashkernel reserved region + * + * Check for the crashkernel reserved region in /proc/iomem, and return + * true if it is present, or false otherwise. We use this to store the + * location of this region, and system RAM regions. + */ +int is_crashkernel_mem_reserved(void) +{ + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return crash_kernel_mem.start != crash_kernel_mem.end; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/arm/crashdump-arm.h b/kexec/arch/arm/crashdump-arm.h new file mode 100644 index 0000000..bbdf8bf --- /dev/null +++ b/kexec/arch/arm/crashdump-arm.h @@ -0,0 +1,27 @@ +#ifndef CRASHDUMP_ARM_H +#define CRASHDUMP_ARM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define COMMAND_LINE_SIZE 1024 +#define DEFAULT_PAGE_OFFSET (0xc0000000) +#define KVBASE_MASK (0x1ffffff) +#define CRASH_MAX_MEMORY_RANGES 32 +#define ARM_MAX_VIRTUAL UINT32_MAX + + +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_kernel_mem; +extern struct memory_range elfcorehdr_mem; + +struct kexec_info; + +extern int load_crashdump_segments(struct kexec_info *, char *); + +#ifdef __cplusplus +} +#endif + +#endif /* CRASHDUMP_ARM_H */ diff --git a/kexec/arch/arm/include/arch/options.h b/kexec/arch/arm/include/arch/options.h new file mode 100644 index 0000000..6fabfb7 --- /dev/null +++ b/kexec/arch/arm/include/arch/options.h @@ -0,0 +1,52 @@ +#ifndef KEXEC_ARCH_ARM_OPTIONS_H +#define KEXEC_ARCH_ARM_OPTIONS_H + +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+0) +#define OPT_ARCH_MAX (OPT_MAX+1) + +#define OPT_DTB (OPT_ARCH_MAX+0) +#define OPT_ATAGS (OPT_ARCH_MAX+1) +#define OPT_IMAGE_SIZE (OPT_ARCH_MAX+2) +#define OPT_PAGE_OFFSET (OPT_ARCH_MAX+3) +#define OPT_APPEND (OPT_ARCH_MAX+4) +#define OPT_RAMDISK (OPT_ARCH_MAX+5) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, 0, OPT_APPEND }, \ + { "append", 1, 0, OPT_APPEND }, \ + { "initrd", 1, 0, OPT_RAMDISK }, \ + { "ramdisk", 1, 0, OPT_RAMDISK }, \ + { "dtb", 1, 0, OPT_DTB }, \ + { "atags", 0, 0, OPT_ATAGS }, \ + { "image-size", 1, 0, OPT_IMAGE_SIZE }, \ + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "" + +extern unsigned int kexec_arm_image_size; + +#endif /* KEXEC_ARCH_ARM_OPTIONS_H */ diff --git a/kexec/arch/arm/iomem.h b/kexec/arch/arm/iomem.h new file mode 100644 index 0000000..85f958e --- /dev/null +++ b/kexec/arch/arm/iomem.h @@ -0,0 +1,9 @@ +#ifndef IOMEM_H +#define IOMEM_H + +#define SYSTEM_RAM "System RAM\n" +#define SYSTEM_RAM_BOOT "System RAM (boot alias)\n" +#define CRASH_KERNEL "Crash kernel\n" +#define CRASH_KERNEL_BOOT "Crash kernel (boot alias)\n" + +#endif diff --git a/kexec/arch/arm/kexec-arm.c b/kexec/arch/arm/kexec-arm.c new file mode 100644 index 0000000..49f35b1 --- /dev/null +++ b/kexec/arch/arm/kexec-arm.c @@ -0,0 +1,150 @@ +/* + * kexec: Linux boots Linux + * + * modified from kexec-ppc.c + * + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-arm.h" +#include <arch/options.h> +#include "../../fs2dt.h" +#include "iomem.h" + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + if (memcmp(str, SYSTEM_RAM_BOOT, strlen(SYSTEM_RAM_BOOT)) == 0 || + memcmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + type = RANGE_RAM; + } + else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else { + continue; + } + + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + /* uImage is probed before zImage because the latter also accepts + uncompressed images. */ + {"uImage", uImage_arm_probe, uImage_arm_load, zImage_arm_usage}, + {"zImage", zImage_arm_probe, zImage_arm_load, zImage_arm_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + printf(" --image-size=<size>\n" + " Specify the assumed total image size of\n" + " the kernel that is about to be loaded,\n" + " including the .bss section, as reported\n" + " by 'arm-linux-size vmlinux'. If not\n" + " specified, this value is implicitly set\n" + " to the compressed images size * 4.\n" + " --dt-no-old-root\n" + " do not reuse old kernel root= param.\n" + " while creating flatten device tree.\n"); +} + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + default: + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "arm", KEXEC_ARCH_ARM }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +/* return 1 if /sys/firmware/fdt exists, otherwise return 0 */ +int have_sysfs_fdt(void) +{ + return !access(SYSFS_FDT, F_OK); +} diff --git a/kexec/arch/arm/kexec-arm.h b/kexec/arch/arm/kexec-arm.h new file mode 100644 index 0000000..a74cce2 --- /dev/null +++ b/kexec/arch/arm/kexec-arm.h @@ -0,0 +1,22 @@ +#ifndef KEXEC_ARM_H +#define KEXEC_ARM_H + +#include <sys/types.h> + +#define SYSFS_FDT "/sys/firmware/fdt" +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +extern off_t initrd_base, initrd_size; + +int zImage_arm_probe(const char *buf, off_t len); +int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void zImage_arm_usage(void); + +int uImage_arm_probe(const char *buf, off_t len); +int uImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +extern int have_sysfs_fdt(void); + +#endif /* KEXEC_ARM_H */ diff --git a/kexec/arch/arm/kexec-elf-rel-arm.c b/kexec/arch/arm/kexec-elf-rel-arm.c new file mode 100644 index 0000000..a939cf4 --- /dev/null +++ b/kexec/arch/arm/kexec-elf-rel-arm.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_ARM) + { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_ARM_ABS32: + *((uint32_t *)location) += value; + break; + case R_ARM_REL32: + *((uint32_t *)location) += value - address; + break; + default: + die("Unknown rel relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/arm/kexec-uImage-arm.c b/kexec/arch/arm/kexec-uImage-arm.c new file mode 100644 index 0000000..03c2f4d --- /dev/null +++ b/kexec/arch/arm/kexec-uImage-arm.c @@ -0,0 +1,22 @@ +/* + * uImage support added by Marc Andre Tanner <mat@brain-dump.org> + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-arm.h" + +int uImage_arm_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_ARM); +} + +int uImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + return zImage_arm_load(argc, argv, buf + sizeof(struct image_header), + len - sizeof(struct image_header), info); +} diff --git a/kexec/arch/arm/kexec-zImage-arm.c b/kexec/arch/arm/kexec-zImage-arm.c new file mode 100644 index 0000000..8b474dd --- /dev/null +++ b/kexec/arch/arm/kexec-zImage-arm.c @@ -0,0 +1,914 @@ +/* + * - 08/21/2007 ATAG support added by Uli Luckas <u.luckas@road.de> + * + */ +#define _GNU_SOURCE +#define _XOPEN_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <unistd.h> +#include <getopt.h> +#include <unistd.h> +#include <libfdt.h> +#include <arch/options.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-arm.h" +#include "../../fs2dt.h" +#include "crashdump-arm.h" +#include "iomem.h" + +#define BOOT_PARAMS_SIZE 1536 + +off_t initrd_base, initrd_size; +unsigned int kexec_arm_image_size = 0; +unsigned long long user_page_offset = (-1ULL); + +struct zimage_header { + uint32_t instr[9]; + uint32_t magic; +#define ZIMAGE_MAGIC cpu_to_le32(0x016f2818) + uint32_t start; + uint32_t end; + uint32_t endian; + + /* Extension to the data passed to the boot agent. The offset + * points at a tagged table following a similar format to the + * ATAGs. + */ + uint32_t magic2; +#define ZIMAGE_MAGIC2 (0x45454545) + uint32_t extension_tag_offset; +}; + +struct android_image { + char magic[8]; + uint32_t kernel_size; + uint32_t kernel_addr; + uint32_t ramdisk_size; + uint32_t ramdisk_addr; + uint32_t stage2_size; + uint32_t stage2_addr; + uint32_t tags_addr; + uint32_t page_size; + uint32_t reserved1; + uint32_t reserved2; + char name[16]; + char command_line[512]; + uint32_t chksum[8]; +}; + +struct tag_header { + uint32_t size; + uint32_t tag; +}; + +/* The list must start with an ATAG_CORE node */ +#define ATAG_CORE 0x54410001 + +struct tag_core { + uint32_t flags; /* bit 0 = read-only */ + uint32_t pagesize; + uint32_t rootdev; +}; + +/* it is allowed to have multiple ATAG_MEM nodes */ +#define ATAG_MEM 0x54410002 + +struct tag_mem32 { + uint32_t size; + uint32_t start; /* physical start address */ +}; + +/* describes where the compressed ramdisk image lives (virtual address) */ +/* + * this one accidentally used virtual addresses - as such, + * it's deprecated. + */ +#define ATAG_INITRD 0x54410005 + +/* describes where the compressed ramdisk image lives (physical address) */ +#define ATAG_INITRD2 0x54420005 + +struct tag_initrd { + uint32_t start; /* physical start address */ + uint32_t size; /* size of compressed ramdisk image in bytes */ +}; + +/* command line: \0 terminated string */ +#define ATAG_CMDLINE 0x54410009 + +struct tag_cmdline { + char cmdline[1]; /* this is the minimum size */ +}; + +/* The list ends with an ATAG_NONE node. */ +#define ATAG_NONE 0x00000000 + +struct tag { + struct tag_header hdr; + union { + struct tag_core core; + struct tag_mem32 mem; + struct tag_initrd initrd; + struct tag_cmdline cmdline; + } u; +}; + +#define tag_next(t) ((struct tag *)((uint32_t *)(t) + (t)->hdr.size)) +#define byte_size(t) ((t)->hdr.size << 2) +#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type) + 3) >> 2) + +struct zimage_tag { + struct tag_header hdr; + union { +#define ZIMAGE_TAG_KRNL_SIZE cpu_to_le32(0x5a534c4b) + struct zimage_krnl_size { + uint32_t size_ptr; + uint32_t bss_size; + } krnl_size; + } u; +}; + +int zImage_arm_probe(const char *UNUSED(buf), off_t UNUSED(len)) +{ + /* + * Only zImage loading is supported. Do not check if + * the buffer is valid kernel image + */ + return 0; +} + +void zImage_arm_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --dtb=FILE Use FILE as the fdt blob.\n" + " --atags Use ATAGs instead of device-tree.\n" + " --page-offset=PAGE_OFFSET\n" + " Set PAGE_OFFSET of crash dump vmcore\n" + ); +} + +static +struct tag * atag_read_tags(void) +{ + static unsigned long buf[BOOT_PARAMS_SIZE]; + const char fn[]= "/proc/atags"; + FILE *fp; + fp = fopen(fn, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + fn, strerror(errno)); + return NULL; + } + + if (!fread(buf, sizeof(buf[1]), BOOT_PARAMS_SIZE, fp)) { + fclose(fp); + return NULL; + } + + if (ferror(fp)) { + fprintf(stderr, "Cannot read %s: %s\n", + fn, strerror(errno)); + fclose(fp); + return NULL; + } + + fclose(fp); + return (struct tag *) buf; +} + +static +int create_mem32_tag(struct tag_mem32 *tag_mem32) +{ + const char fn[]= "/proc/device-tree/memory/reg"; + uint32_t tmp[2]; + FILE *fp; + + fp = fopen(fn, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %m\n", fn); + return -1; + } + + if (fread(tmp, sizeof(tmp[0]), 2, fp) != 2) { + fprintf(stderr, "Short read %s\n", fn); + fclose(fp); + return -1; + } + + if (ferror(fp)) { + fprintf(stderr, "Cannot read %s: %m\n", fn); + fclose(fp); + return -1; + } + + /* atags_mem32 has base/size fields reversed! */ + tag_mem32->size = be32_to_cpu(tmp[1]); + tag_mem32->start = be32_to_cpu(tmp[0]); + + fclose(fp); + return 0; +} + +static +int atag_arm_load(struct kexec_info *info, unsigned long base, + const char *command_line, off_t command_line_len, const char *initrd) +{ + struct tag *saved_tags = atag_read_tags(); + char *buf; + off_t len; + struct tag *params; + + buf = xmalloc(getpagesize()); + memset(buf, 0xff, getpagesize()); + params = (struct tag *)buf; + + if (saved_tags) { + // Copy tags + saved_tags = (struct tag *) saved_tags; + while(byte_size(saved_tags)) { + switch (saved_tags->hdr.tag) { + case ATAG_INITRD: + case ATAG_INITRD2: + case ATAG_CMDLINE: + case ATAG_NONE: + // skip these tags + break; + default: + // copy all other tags + memcpy(params, saved_tags, byte_size(saved_tags)); + params = tag_next(params); + } + saved_tags = tag_next(saved_tags); + } + } else { + params->hdr.size = 2; + params->hdr.tag = ATAG_CORE; + params = tag_next(params); + + if (!create_mem32_tag(¶ms->u.mem)) { + params->hdr.size = 4; + params->hdr.tag = ATAG_MEM; + params = tag_next(params); + } + } + + if (initrd) { + params->hdr.size = tag_size(tag_initrd); + params->hdr.tag = ATAG_INITRD2; + params->u.initrd.start = initrd_base; + params->u.initrd.size = initrd_size; + params = tag_next(params); + } + + if (command_line) { + params->hdr.size = (sizeof(struct tag_header) + command_line_len + 3) >> 2; + params->hdr.tag = ATAG_CMDLINE; + memcpy(params->u.cmdline.cmdline, command_line, + command_line_len); + params->u.cmdline.cmdline[command_line_len - 1] = '\0'; + params = tag_next(params); + } + + params->hdr.size = 0; + params->hdr.tag = ATAG_NONE; + + len = ((char *)params - buf) + sizeof(struct tag_header); + + add_segment(info, buf, len, base, len); + + return 0; +} + +static int setup_dtb_prop(char **bufp, off_t *sizep, int parentoffset, + const char *node_name, const char *prop_name, + const void *val, int len) +{ + char *dtb_buf; + off_t dtb_size; + int off; + int prop_len = 0; + const struct fdt_property *prop; + + if ((bufp == NULL) || (sizep == NULL) || (*bufp == NULL)) + die("Internal error\n"); + + dtb_buf = *bufp; + dtb_size = *sizep; + + /* check if the subnode has already exist */ + off = fdt_subnode_offset(dtb_buf, parentoffset, node_name); + if (off == -FDT_ERR_NOTFOUND) { + dtb_size += fdt_node_len(node_name); + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + off = fdt_add_subnode(dtb_buf, parentoffset, node_name); + } + + if (off < 0) { + fprintf(stderr, "FDT: Error adding %s node.\n", node_name); + return -1; + } + + prop = fdt_get_property(dtb_buf, off, prop_name, &prop_len); + if ((prop == NULL) && (prop_len != -FDT_ERR_NOTFOUND)) { + die("FDT: fdt_get_property"); + } else if (prop == NULL) { + /* prop_len == -FDT_ERR_NOTFOUND */ + /* prop doesn't exist */ + dtb_size += fdt_prop_len(prop_name, len); + } else { + if (prop_len < len) + dtb_size += FDT_TAGALIGN(len - prop_len); + } + + if (fdt_totalsize(dtb_buf) < dtb_size) { + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + } + + if (fdt_setprop(dtb_buf, off, prop_name, + val, len) != 0) { + fprintf(stderr, "FDT: Error setting %s/%s property.\n", + node_name, prop_name); + return -1; + } + *bufp = dtb_buf; + *sizep = dtb_size; + return 0; +} + +static const struct zimage_tag *find_extension_tag(const char *buf, off_t len, + uint32_t tag_id) +{ + const struct zimage_header *hdr = (const struct zimage_header *)buf; + const struct zimage_tag *tag; + uint32_t offset, size; + uint32_t max = len - sizeof(struct tag_header); + + if (len < sizeof(*hdr) || + hdr->magic != ZIMAGE_MAGIC || + hdr->magic2 != ZIMAGE_MAGIC2) + return NULL; + + for (offset = hdr->extension_tag_offset; + (tag = (void *)(buf + offset)) != NULL && + offset < max && + (size = le32_to_cpu(byte_size(tag))) != 0 && + offset + size < len; + offset += size) { + dbgprintf(" offset 0x%08x tag 0x%08x size %u\n", + offset, le32_to_cpu(tag->hdr.tag), size); + if (tag->hdr.tag == tag_id) + return tag; + } + + return NULL; +} + +static int get_cells_size(void *fdt, uint32_t *address_cells, + uint32_t *size_cells) +{ + int nodeoffset; + const uint32_t *prop = NULL; + int prop_len; + + /* default values */ + *address_cells = 1; + *size_cells = 1; + + /* under root node */ + nodeoffset = fdt_path_offset(fdt, "/"); + if (nodeoffset < 0) + return -1; + + prop = fdt_getprop(fdt, nodeoffset, "#address-cells", &prop_len); + if (prop) { + if (prop_len != sizeof(*prop)) + return -1; + + *address_cells = fdt32_to_cpu(*prop); + } + + prop = fdt_getprop(fdt, nodeoffset, "#size-cells", &prop_len); + if (prop) { + if (prop_len != sizeof(*prop)) + return -1; + + *size_cells = fdt32_to_cpu(*prop); + } + + dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__, + *address_cells, *size_cells); + return 0; +} + +static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells, + struct memory_range *range) +{ + dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end); + + /* if *_cells >= 2, cells can hold 64-bit values anyway */ + if ((address_cells == 1) && (range->start >= (1ULL << 32))) + return false; + + if ((size_cells == 1) && + ((range->end - range->start + 1) >= (1ULL << 32))) + return false; + + return true; +} + +static void fill_property(void *buf, uint64_t val, uint32_t cells) +{ + uint32_t val32; + int i; + + if (cells == 1) { + val32 = cpu_to_fdt32((uint32_t)val); + memcpy(buf, &val32, sizeof(uint32_t)); + } else { + for (i = 0; + i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++) + *(char *)buf++ = 0; + + val = cpu_to_fdt64(val); + memcpy(buf, &val, sizeof(uint64_t)); + } +} + +static int setup_dtb_prop_range(char **bufp, off_t *sizep, int parentoffset, + const char *node_name, const char *prop_name, + struct memory_range *range, + uint32_t address_cells, uint32_t size_cells) +{ + void *buf, *prop; + size_t buf_size; + int result; + + buf_size = (address_cells + size_cells) * sizeof(uint32_t); + prop = buf = xmalloc(buf_size); + + fill_property(prop, range->start, address_cells); + prop += address_cells * sizeof(uint32_t); + + fill_property(prop, range->end - range->start + 1, size_cells); + prop += size_cells * sizeof(uint32_t); + + result = setup_dtb_prop(bufp, sizep, parentoffset, node_name, + prop_name, buf, buf_size); + + free(buf); + + return result; +} + +int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + unsigned long page_size = getpagesize(); + unsigned long base, kernel_base; + unsigned int atag_offset = 0x1000; /* 4k offset from memory start */ + unsigned int extra_size = 0x8000; /* TEXT_OFFSET */ + uint32_t address_cells, size_cells; + const struct zimage_tag *tag; + size_t kernel_buf_size; + size_t kernel_mem_size; + const char *command_line; + char *modified_cmdline = NULL; + off_t command_line_len; + const char *ramdisk; + const char *ramdisk_buf; + int opt; + int use_atags; + int result; + char *dtb_buf; + off_t dtb_length; + char *dtb_file; + off_t dtb_offset; + char *end; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "dtb", 1, 0, OPT_DTB }, + { "atags", 0, 0, OPT_ATAGS }, + { "image-size", 1, 0, OPT_IMAGE_SIZE }, + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = 0; + command_line_len = 0; + ramdisk = 0; + ramdisk_buf = 0; + initrd_size = 0; + use_atags = 0; + dtb_file = NULL; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DTB: + dtb_file = optarg; + break; + case OPT_ATAGS: + use_atags = 1; + break; + case OPT_IMAGE_SIZE: + kexec_arm_image_size = strtoul(optarg, &end, 0); + break; + case OPT_PAGE_OFFSET: + user_page_offset = strtoull(optarg, &end, 0); + break; + } + } + + if (use_atags && dtb_file) { + fprintf(stderr, "You can only use ATAGs if you don't specify a " + "dtb file.\n"); + return -1; + } + + if (!use_atags && !dtb_file) { + int f; + + f = have_sysfs_fdt(); + if (f) + dtb_file = SYSFS_FDT; + } + + if (command_line) { + command_line_len = strlen(command_line) + 1; + if (command_line_len > COMMAND_LINE_SIZE) + command_line_len = COMMAND_LINE_SIZE; + } + if (ramdisk) + ramdisk_buf = slurp_file_mmap(ramdisk, &initrd_size); + + if (dtb_file) + dtb_buf = slurp_file(dtb_file, &dtb_length); + + if (len > sizeof(struct zimage_header)) { + const struct zimage_header *hdr; + off_t size; + + hdr = (const struct zimage_header *)buf; + + dbgprintf("zImage header: 0x%08x 0x%08x 0x%08x\n", + hdr->magic, hdr->start, hdr->end); + + if (hdr->magic == ZIMAGE_MAGIC) { + size = le32_to_cpu(hdr->end) - le32_to_cpu(hdr->start); + + dbgprintf("zImage size 0x%llx, file size 0x%llx\n", + (unsigned long long)size, + (unsigned long long)len); + + if (size > len) { + fprintf(stderr, + "zImage is truncated - file 0x%llx vs header 0x%llx\n", + (unsigned long long)len, + (unsigned long long)size); + return -1; + } + if (size < len) + len = size; + } + } + + /* Handle android images, 2048 is the minimum page size */ + if (len > 2048 && !strncmp(buf, "ANDROID!", 8)) { + const struct android_image *aimg = (const void *)buf; + uint32_t page_size = le32_to_cpu(aimg->page_size); + uint32_t kernel_size = le32_to_cpu(aimg->kernel_size); + uint32_t ramdisk_size = le32_to_cpu(aimg->ramdisk_size); + uint32_t stage2_size = le32_to_cpu(aimg->stage2_size); + off_t aimg_size = page_size + _ALIGN(kernel_size, page_size) + + _ALIGN(ramdisk_size, page_size) + stage2_size; + + if (len < aimg_size) { + fprintf(stderr, "Android image size is incorrect\n"); + return -1; + } + + /* Get the kernel */ + buf = buf + page_size; + len = kernel_size; + + /* And the ramdisk if none was given on the command line */ + if (!ramdisk && ramdisk_size) { + initrd_size = ramdisk_size; + ramdisk_buf = buf + _ALIGN(kernel_size, page_size); + } + + /* Likewise for the command line */ + if (!command_line && aimg->command_line[0]) { + command_line = aimg->command_line; + if (command_line[sizeof(aimg->command_line) - 1]) + command_line_len = sizeof(aimg->command_line); + else + command_line_len = strlen(command_line) + 1; + } + } + + /* + * Save the length of the compressed kernel image w/o the appended DTB. + * This will be required later on when the kernel image contained + * in the zImage will be loaded into a kernel memory segment. + * And we want to load ONLY the compressed kernel image from the zImage + * and discard the appended DTB. + */ + kernel_buf_size = len; + + /* + * Always extend the zImage by four bytes to ensure that an appended + * DTB image always sees an initialised value after _edata. + */ + kernel_mem_size = len + 4; + + /* + * Check for a kernel size extension, and set or validate the + * image size. This is the total space needed to avoid the + * boot kernel BSS, so other data (such as initrd) does not get + * overwritten. + */ + tag = find_extension_tag(buf, len, ZIMAGE_TAG_KRNL_SIZE); + + /* + * The zImage length does not include its stack (4k) or its + * malloc space (64k). Include this. + */ + len += 0x11000; + + dbgprintf("zImage requires 0x%08llx bytes\n", (unsigned long long)len); + + if (tag) { + uint32_t *p = (void *)buf + le32_to_cpu(tag->u.krnl_size.size_ptr); + uint32_t edata_size = le32_to_cpu(get_unaligned(p)); + uint32_t bss_size = le32_to_cpu(tag->u.krnl_size.bss_size); + uint32_t kernel_size = edata_size + bss_size; + + dbgprintf("Decompressed kernel sizes:\n"); + dbgprintf(" text+data 0x%08lx bss 0x%08lx total 0x%08lx\n", + (unsigned long)edata_size, + (unsigned long)bss_size, + (unsigned long)kernel_size); + + /* + * While decompressing, the zImage is placed past _edata + * of the decompressed kernel. Ensure we account for that. + */ + if (kernel_size < edata_size + len) + kernel_size = edata_size + len; + + dbgprintf("Resulting kernel space: 0x%08lx\n", + (unsigned long)kernel_size); + + if (kexec_arm_image_size == 0) + kexec_arm_image_size = kernel_size; + else if (kexec_arm_image_size < kernel_size) { + fprintf(stderr, + "Kernel size is too small, increasing to 0x%lx\n", + (unsigned long)kernel_size); + kexec_arm_image_size = kernel_size; + } + } + + /* + * If the user didn't specify the size of the image, and we don't + * have the extension tables, assume the maximum kernel compression + * ratio is 4. Note that we must include space for the compressed + * image here as well. + */ + if (!kexec_arm_image_size) + kexec_arm_image_size = len * 5; + + /* + * If we are loading a dump capture kernel, we need to update kernel + * command line and also add some additional segments. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + uint64_t start, end; + + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset(modified_cmdline, '\0', COMMAND_LINE_SIZE); + + if (command_line) { + (void) strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + if (load_crashdump_segments(info, modified_cmdline) < 0) { + free(modified_cmdline); + return -1; + } + + command_line = modified_cmdline; + command_line_len = strlen(command_line) + 1; + + /* + * We put the dump capture kernel at the start of crashkernel + * reserved memory. + */ + if (parse_iomem_single(CRASH_KERNEL_BOOT, &start, &end) && + parse_iomem_single(CRASH_KERNEL, &start, &end)) { + /* + * No crash kernel memory reserved. We cannot do more + * but just bail out. + */ + return ENOCRASHKERNEL; + } + base = start; + } else { + base = locate_hole(info, len + extra_size, 0, 0, + ULONG_MAX, INT_MAX); + } + + if (base == ULONG_MAX) + return -1; + + kernel_base = base + extra_size; + + /* + * Calculate the minimum address of the initrd, which must be + * above the memory used by the zImage while it runs. This + * needs to be page-size aligned. + */ + initrd_base = kernel_base + _ALIGN(kexec_arm_image_size, page_size); + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "Kernel", + (unsigned long)kernel_base, + (unsigned long)kexec_arm_image_size); + + if (ramdisk_buf) { + /* + * Find a hole to place the initrd. The crash kernel use + * fixed address, so no check is ok. + */ + if (!(info->kexec_flags & KEXEC_ON_CRASH)) { + initrd_base = locate_hole(info, initrd_size, page_size, + initrd_base, + ULONG_MAX, INT_MAX); + if (initrd_base == ULONG_MAX) + return -1; + } + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "Initrd", + (unsigned long)initrd_base, + (unsigned long)initrd_size); + + add_segment(info, ramdisk_buf, initrd_size, initrd_base, + initrd_size); + } + + if (use_atags) { + /* + * use ATAGs from /proc/atags + */ + if (atag_arm_load(info, base + atag_offset, + command_line, command_line_len, + ramdisk_buf) == -1) + return -1; + } else { + /* + * Read a user-specified DTB file. + */ + if (dtb_file) { + if (fdt_check_header(dtb_buf) != 0) { + fprintf(stderr, "Invalid FDT buffer.\n"); + return -1; + } + + if (command_line) { + /* + * Error should have been reported so + * directly return -1 + */ + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "bootargs", command_line, + strlen(command_line) + 1)) + return -1; + } + } else { + /* + * Extract the DTB from /proc/device-tree. + */ + create_flatten_tree(&dtb_buf, &dtb_length, command_line); + } + + /* + * Add the initrd parameters to the dtb + */ + if (ramdisk_buf) { + unsigned long start, end; + + start = cpu_to_be32((unsigned long)(initrd_base)); + end = cpu_to_be32((unsigned long)(initrd_base + initrd_size)); + + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "linux,initrd-start", &start, + sizeof(start))) + return -1; + if (setup_dtb_prop(&dtb_buf, &dtb_length, 0, "chosen", + "linux,initrd-end", &end, + sizeof(end))) + return -1; + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + /* Determine #address-cells and #size-cells */ + result = get_cells_size(dtb_buf, &address_cells, + &size_cells); + if (result) { + fprintf(stderr, "Cannot determine cells-size.\n"); + return -1; + } + + if (!cells_size_fitted(address_cells, size_cells, + &elfcorehdr_mem)) { + fprintf(stderr, "elfcorehdr doesn't fit cells-size.\n"); + return -1; + } + + if (!cells_size_fitted(address_cells, size_cells, + &crash_kernel_mem)) { + fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n"); + return -1; + } + + /* Add linux,elfcorehdr */ + if (setup_dtb_prop_range(&dtb_buf, &dtb_length, 0, + "chosen", "linux,elfcorehdr", + &elfcorehdr_mem, + address_cells, size_cells)) + return -1; + + /* Add linux,usable-memory-range */ + if (setup_dtb_prop_range(&dtb_buf, &dtb_length, 0, + "chosen", + "linux,usable-memory-range", + &crash_kernel_mem, + address_cells, size_cells)) + return -1; + } + + /* + * The dtb must also be placed above the memory used by + * the zImage. We don't care about its position wrt the + * ramdisk, but we might as well place it after the initrd. + * We leave a buffer page between the initrd and the dtb. + */ + dtb_offset = initrd_base + initrd_size + page_size; + dtb_offset = _ALIGN_DOWN(dtb_offset, page_size); + + /* + * Find a hole to place the dtb above the initrd. + * Crash kernel use fixed address, no check is ok. + */ + if (!(info->kexec_flags & KEXEC_ON_CRASH)) { + dtb_offset = locate_hole(info, dtb_length, page_size, + dtb_offset, ULONG_MAX, INT_MAX); + if (dtb_offset == ULONG_MAX) + return -1; + } + + dbgprintf("%-6s: address=0x%08lx size=0x%08lx\n", "DT", + (unsigned long)dtb_offset, (unsigned long)dtb_length); + + add_segment(info, dtb_buf, dtb_length, dtb_offset, dtb_length); + } + + add_segment(info, buf, kernel_buf_size, kernel_base, kernel_mem_size); + + info->entry = (void*)kernel_base; + + return 0; +} diff --git a/kexec/arch/arm/phys_to_virt.c b/kexec/arch/arm/phys_to_virt.c new file mode 100644 index 0000000..46a4f68 --- /dev/null +++ b/kexec/arch/arm/phys_to_virt.c @@ -0,0 +1,22 @@ +#include "../../kexec.h" +#include "../../crashdump.h" +#include "phys_to_virt.h" + +uint64_t phys_offset; + +/** + * phys_to_virt() - translate physical address to virtual address + * @paddr: physical address to translate + * + * For ARM we have following equation to translate from virtual address to + * physical: + * paddr = vaddr - PAGE_OFFSET + PHYS_OFFSET + * + * See also: + * http://lists.arm.linux.org.uk/lurker/message/20010723.185051.94ce743c.en.html + */ +unsigned long +phys_to_virt(struct crash_elf_info *elf_info, unsigned long long paddr) +{ + return paddr + elf_info->page_offset - phys_offset; +} diff --git a/kexec/arch/arm/phys_to_virt.h b/kexec/arch/arm/phys_to_virt.h new file mode 100644 index 0000000..b3147dd --- /dev/null +++ b/kexec/arch/arm/phys_to_virt.h @@ -0,0 +1,8 @@ +#ifndef PHYS_TO_VIRT_H +#define PHYS_TO_VIRT_H + +#include <stdint.h> + +extern uint64_t phys_offset; + +#endif diff --git a/kexec/arch/arm64/Makefile b/kexec/arch/arm64/Makefile new file mode 100644 index 0000000..59212f1 --- /dev/null +++ b/kexec/arch/arm64/Makefile @@ -0,0 +1,50 @@ + +arm64_FS2DT += kexec/fs2dt.c +arm64_FS2DT_INCLUDE += \ + -include $(srcdir)/kexec/arch/arm64/crashdump-arm64.h \ + -include $(srcdir)/kexec/arch/arm64/kexec-arm64.h + +arm64_DT_OPS += kexec/dt-ops.c + +arm64_MEM_REGIONS = kexec/mem_regions.c + +arm64_CPPFLAGS += -I $(srcdir)/kexec/ + +arm64_KEXEC_SRCS += \ + kexec/arch/arm64/crashdump-arm64.c \ + kexec/arch/arm64/kexec-arm64.c \ + kexec/arch/arm64/kexec-elf-arm64.c \ + kexec/arch/arm64/kexec-uImage-arm64.c \ + kexec/arch/arm64/kexec-image-arm64.c \ + kexec/arch/arm64/kexec-vmlinuz-arm64.c + +arm64_UIMAGE = kexec/kexec-uImage.c + +arm64_ARCH_REUSE_INITRD = +arm64_ADD_SEGMENT = +arm64_VIRT_TO_PHYS = +arm64_PHYS_TO_VIRT = + +dist += $(arm64_KEXEC_SRCS) \ + kexec/arch/arm64/include/arch/options.h \ + kexec/arch/arm64/crashdump-arm64.h \ + kexec/arch/arm64/image-header.h \ + kexec/arch/arm64/iomem.h \ + kexec/arch/arm64/kexec-arm64.h \ + kexec/arch/arm64/Makefile + +ifdef HAVE_LIBFDT + +LIBS += -lfdt + +else + +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +arm64_CPPFLAGS += -I$(srcdir)/kexec/libfdt + +arm64_KEXEC_SRCS += $(libfdt_SRCS) + +endif diff --git a/kexec/arch/arm64/crashdump-arm64.c b/kexec/arch/arm64/crashdump-arm64.c new file mode 100644 index 0000000..3098315 --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.c @@ -0,0 +1,248 @@ +/* + * ARM64 crashdump. + * partly derived from arm implementation + * + * Copyright (c) 2014-2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/elf.h> + +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "iomem.h" +#include "kexec-arm64.h" +#include "kexec-elf.h" +#include "mem_regions.h" + +/* memory ranges of crashed kernel */ +static struct memory_ranges system_memory_rgns; + +/* memory range reserved for crashkernel */ +struct memory_range crash_reserved_mem[CRASH_MAX_RESERVED_RANGES]; +struct memory_ranges usablemem_rgns = { + .size = 0, + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_mem, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info = { + .class = ELFCLASS64, +#if (__BYTE_ORDER == __LITTLE_ENDIAN) + .data = ELFDATA2LSB, +#else + .data = ELFDATA2MSB, +#endif + .machine = EM_AARCH64, +}; + +/* + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called once for each memory region found in /proc/iomem. + * It locates system RAM and crashkernel reserved memory and places these to + * variables, respectively, system_memory_rgns and usablemem_rgns. + */ + +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) + return mem_regions_alloc_and_add(&usablemem_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) + return mem_regions_alloc_and_add(&system_memory_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0) { + + unsigned long long kva_text = get_kernel_sym("_text"); + unsigned long long kva_stext = get_kernel_sym("_stext"); + unsigned long long kva_text_end = get_kernel_sym("__init_begin"); + + /* + * old: kernel_code.start = __pa_symbol(_text); + * new: kernel_code.start = __pa_symbol(_stext); + * + * For compatibility, deduce by comparing the gap "__init_begin - _stext" + * and the res size of "Kernel code" in /proc/iomem + */ + if (kva_text_end - kva_stext == length) + elf_info.kern_paddr_start = base - (kva_stext - kva_text); + else + elf_info.kern_paddr_start = base; + } + else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0) + elf_info.kern_size = base + length - elf_info.kern_paddr_start; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return usablemem_rgns.size; +} + +/* + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory + * regions in system_memory_ranges. + * Regions are sorted in ascending order. + * + * Returns 0 in case of success and a negative value otherwise. + */ +static int crash_get_memory_ranges(void) +{ + int i; + + /* + * First read all memory regions that can be considered as + * system memory including the crash area. + */ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + /* allow one or two regions for crash dump kernel */ + if (!usablemem_rgns.size) + return -EINVAL; + + dbgprint_mem_range("Reserved memory range", + usablemem_rgns.ranges, usablemem_rgns.size); + + for (i = 0; i < usablemem_rgns.size; i++) { + if (mem_regions_alloc_and_exclude(&system_memory_rgns, + &crash_reserved_mem[i])) { + fprintf(stderr, "Cannot allocate memory for ranges\n"); + return -ENOMEM; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&system_memory_rgns); + + dbgprint_mem_range("Coredump memory ranges", + system_memory_rgns.ranges, system_memory_rgns.size); + + /* + * For additional kernel code/data segment. + * kern_paddr_start/kern_size are determined in iomem_range_callback + */ + elf_info.kern_vaddr_start = get_kernel_sym("_text"); + if (!elf_info.kern_vaddr_start) + elf_info.kern_vaddr_start = UINT64_MAX; + + return 0; +} + +/* + * load_crashdump_segments() - load the elf core header + * @info: kexec info structure + * + * This function creates and loads an additional segment of elf core header + : which is used to construct /proc/vmcore on crash dump kernel. + * + * Return 0 in case of success and -1 in case of error. + */ + +int load_crashdump_segments(struct kexec_info *info) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + + /* + * First fetch all the memory (RAM) ranges that we are going to + * pass to the crash dump kernel during panic. + */ + + err = crash_get_memory_ranges(); + + if (err) + return EFAILED; + + get_page_offset((unsigned long *)&elf_info.page_offset); + dbgprintf("%s: page_offset: %016llx\n", __func__, + elf_info.page_offset); + + err = crash_create_elf64_headers(info, &elf_info, + system_memory_rgns.ranges, system_memory_rgns.size, + &buf, &bufsz, ELF_CORE_HEADER_ALIGN); + + if (err) + return EFAILED; + + elfcorehdr = add_buffer_phys_virt(info, buf, bufsz, bufsz, 0, + crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end, + -1, 0); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__, + elfcorehdr_mem.start, elfcorehdr_mem.end); + + return 0; +} + +/* + * e_entry and p_paddr are actually in virtual address space. + * Those values will be translated to physcal addresses by using + * virt_to_phys() in add_segment(). + * So let's fix up those values for later use so the memory base + * (arm64_mm.phys_offset) will be correctly replaced with + * crash_reserved_mem[usablemem_rgns.size - 1].start. + */ +void fixup_elf_addrs(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr; + int i; + + ehdr->e_entry += -arm64_mem.phys_offset + + crash_reserved_mem[usablemem_rgns.size - 1].start; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + continue; + phdr->p_paddr += + (-arm64_mem.phys_offset + + crash_reserved_mem[usablemem_rgns.size - 1].start); + } +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + if (!usablemem_rgns.size) + return -1; + + *start = crash_reserved_mem[usablemem_rgns.size - 1].start; + *end = crash_reserved_mem[usablemem_rgns.size - 1].end; + + return 0; +} diff --git a/kexec/arch/arm64/crashdump-arm64.h b/kexec/arch/arm64/crashdump-arm64.h new file mode 100644 index 0000000..82fa69b --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.h @@ -0,0 +1,29 @@ +/* + * ARM64 crashdump. + * + * Copyright (c) 2014-2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef CRASHDUMP_ARM64_H +#define CRASHDUMP_ARM64_H + +#include "kexec.h" + +#define CRASH_MAX_MEMORY_RANGES 32768 + +/* crash dump kernel support at most two regions, low_region and high region. */ +#define CRASH_MAX_RESERVED_RANGES 2 + +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_reserved_mem[]; +extern struct memory_range elfcorehdr_mem; + +extern int load_crashdump_segments(struct kexec_info *info); +extern void fixup_elf_addrs(struct mem_ehdr *ehdr); + +#endif /* CRASHDUMP_ARM64_H */ diff --git a/kexec/arch/arm64/image-header.h b/kexec/arch/arm64/image-header.h new file mode 100644 index 0000000..26bb02f --- /dev/null +++ b/kexec/arch/arm64/image-header.h @@ -0,0 +1,147 @@ +/* + * ARM64 binary image header. + */ + +#if !defined(__ARM64_IMAGE_HEADER_H) +#define __ARM64_IMAGE_HEADER_H + +#include <endian.h> +#include <stdint.h> + +/** + * struct arm64_image_header - arm64 kernel image header. + * + * @pe_sig: Optional PE format 'MZ' signature. + * @branch_code: Reserved for instructions to branch to stext. + * @text_offset: The image load offset in LSB byte order. + * @image_size: An estimated size of the memory image size in LSB byte order. + * @flags: Bit flags in LSB byte order: + * Bit 0: Image byte order: 1=MSB. + * Bit 1-2: Kernel page size: 1=4K, 2=16K, 3=64K. + * Bit 3: Image placement: 0=low. + * @reserved_1: Reserved. + * @magic: Magic number, "ARM\x64". + * @pe_header: Optional offset to a PE format header. + **/ + +struct arm64_image_header { + uint8_t pe_sig[2]; + uint16_t branch_code[3]; + uint64_t text_offset; + uint64_t image_size; + uint64_t flags; + uint64_t reserved_1[3]; + uint8_t magic[4]; + uint32_t pe_header; +}; + +static const uint8_t arm64_image_magic[4] = {'A', 'R', 'M', 0x64U}; +static const uint8_t arm64_image_pe_sig[2] = {'M', 'Z'}; +static const uint8_t arm64_pe_machtype[6] = {'P','E', 0x0, 0x0, 0x64, 0xAA}; +static const uint64_t arm64_image_flag_be = (1UL << 0); +static const uint64_t arm64_image_flag_page_size = (3UL << 1); +static const uint64_t arm64_image_flag_placement = (1UL << 3); + +/** + * enum arm64_header_page_size + */ + +enum arm64_header_page_size { + arm64_header_page_size_invalid = 0, + arm64_header_page_size_4k, + arm64_header_page_size_16k, + arm64_header_page_size_64k +}; + +/** + * arm64_header_check_magic - Helper to check the arm64 image header. + * + * Returns non-zero if header is OK. + */ + +static inline int arm64_header_check_magic(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->magic[0] == arm64_image_magic[0] + && h->magic[1] == arm64_image_magic[1] + && h->magic[2] == arm64_image_magic[2] + && h->magic[3] == arm64_image_magic[3]); +} + +/** + * arm64_header_check_pe_sig - Helper to check the arm64 image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int arm64_header_check_pe_sig(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == arm64_image_pe_sig[0] + && h->pe_sig[1] == arm64_image_pe_sig[1]); +} + +/** + * arm64_header_check_msb - Helper to check the arm64 image header. + * + * Returns non-zero if the image was built as big endian. + */ + +static inline int arm64_header_check_msb(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_be) >> 0; +} + +/** + * arm64_header_page_size + */ + +static inline enum arm64_header_page_size arm64_header_page_size( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_page_size) >> 1; +} + +/** + * arm64_header_placement + * + * Returns non-zero if the image has no physical placement restrictions. + */ + +static inline int arm64_header_placement(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (le64toh(h->flags) & arm64_image_flag_placement) >> 3; +} + +static inline uint64_t arm64_header_text_offset( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->text_offset); +} + +static inline uint64_t arm64_header_image_size( + const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->image_size); +} + +#endif diff --git a/kexec/arch/arm64/include/arch/options.h b/kexec/arch/arm64/include/arch/options.h new file mode 100644 index 0000000..8c695f3 --- /dev/null +++ b/kexec/arch/arm64/include/arch/options.h @@ -0,0 +1,43 @@ +#if !defined(KEXEC_ARCH_ARM64_OPTIONS_H) +#define KEXEC_ARCH_ARM64_OPTIONS_H + +#define OPT_APPEND ((OPT_MAX)+0) +#define OPT_DTB ((OPT_MAX)+1) +#define OPT_INITRD ((OPT_MAX)+2) +#define OPT_REUSE_CMDLINE ((OPT_MAX)+3) +#define OPT_SERIAL ((OPT_MAX)+4) +#define OPT_ARCH_MAX ((OPT_MAX)+5) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "append", 1, NULL, OPT_APPEND }, \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "dtb", 1, NULL, OPT_DTB }, \ + { "initrd", 1, NULL, OPT_INITRD }, \ + { "serial", 1, NULL, OPT_SERIAL }, \ + { "ramdisk", 1, NULL, OPT_INITRD }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR /* Only accept long arch options. */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +static const char arm64_opts_usage[] __attribute__ ((unused)) = +" --append=STRING Set the kernel command line to STRING.\n" +" --command-line=STRING Set the kernel command line to STRING.\n" +" --dtb=FILE Use FILE as the device tree blob.\n" +" --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --serial=STRING Name of console used for purgatory printing. (e.g. ttyAMA0)\n" +" --ramdisk=FILE Use FILE as the kernel initial ramdisk.\n" +" --reuse-cmdline Use kernel command line from running system.\n"; + +struct arm64_opts { + const char *command_line; + const char *dtb; + const char *initrd; + const char *console; +}; + +extern struct arm64_opts arm64_opts; + +#endif diff --git a/kexec/arch/arm64/iomem.h b/kexec/arch/arm64/iomem.h new file mode 100644 index 0000000..d4864bb --- /dev/null +++ b/kexec/arch/arm64/iomem.h @@ -0,0 +1,10 @@ +#ifndef IOMEM_H +#define IOMEM_H + +#define SYSTEM_RAM "System RAM\n" +#define KERNEL_CODE "Kernel code\n" +#define KERNEL_DATA "Kernel data\n" +#define CRASH_KERNEL "Crash kernel\n" +#define IOMEM_RESERVED "reserved\n" + +#endif diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c new file mode 100644 index 0000000..4a67b0d --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.c @@ -0,0 +1,1365 @@ +/* + * ARM64 kexec. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <inttypes.h> +#include <libfdt.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <linux/elf-em.h> +#include <elf.h> +#include <elf_info.h> + +#include <unistd.h> +#include <syscall.h> +#include <errno.h> +#include <linux/random.h> + +#include "kexec.h" +#include "kexec-arm64.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "dt-ops.h" +#include "fs2dt.h" +#include "iomem.h" +#include "kexec-syscall.h" +#include "mem_regions.h" +#include "arch/options.h" + +#define ROOT_NODE_ADDR_CELLS_DEFAULT 1 +#define ROOT_NODE_SIZE_CELLS_DEFAULT 1 + +#define PROP_ADDR_CELLS "#address-cells" +#define PROP_SIZE_CELLS "#size-cells" +#define PROP_ELFCOREHDR "linux,elfcorehdr" +#define PROP_USABLE_MEM_RANGE "linux,usable-memory-range" + +#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36) +#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39) +#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) +#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) +#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) + +/* Global flag which indicates that we have tried reading + * PHYS_OFFSET from 'kcore' already. + */ +static bool try_read_phys_offset_from_kcore = false; + +/* Machine specific details. */ +static int va_bits = -1; +static unsigned long page_offset; + +/* Global varables the core kexec routines expect. */ + +unsigned char reuse_initrd; + +off_t initrd_base; +off_t initrd_size; + +const struct arch_map_entry arches[] = { + { "aarch64", KEXEC_ARCH_ARM64 }, + { "aarch64_be", KEXEC_ARCH_ARM64 }, + { NULL, 0 }, +}; + +struct file_type file_type[] = { + {"vmlinux", elf_arm64_probe, elf_arm64_load, elf_arm64_usage}, + {"Image", image_arm64_probe, image_arm64_load, image_arm64_usage}, + {"uImage", uImage_arm64_probe, uImage_arm64_load, uImage_arm64_usage}, + {"vmlinuz", pez_arm64_probe, pez_arm64_load, pez_arm64_usage}, +}; + +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +/* arm64 global varables. */ + +struct arm64_opts arm64_opts; +struct arm64_mem arm64_mem = { + .phys_offset = arm64_mem_ngv, + .vp_offset = arm64_mem_ngv, +}; + +uint64_t get_phys_offset(void) +{ + assert(arm64_mem.phys_offset != arm64_mem_ngv); + return arm64_mem.phys_offset; +} + +uint64_t get_vp_offset(void) +{ + assert(arm64_mem.vp_offset != arm64_mem_ngv); + return arm64_mem.vp_offset; +} + +/** + * arm64_process_image_header - Process the arm64 image header. + * + * Make a guess that KERNEL_IMAGE_SIZE will be enough for older kernels. + */ + +int arm64_process_image_header(const struct arm64_image_header *h) +{ +#if !defined(KERNEL_IMAGE_SIZE) +# define KERNEL_IMAGE_SIZE MiB(16) +#endif + + if (!arm64_header_check_magic(h)) + return EFAILED; + + if (h->image_size) { + arm64_mem.text_offset = arm64_header_text_offset(h); + arm64_mem.image_size = arm64_header_image_size(h); + } else { + /* For 3.16 and older kernels. */ + arm64_mem.text_offset = 0x80000; + arm64_mem.image_size = KERNEL_IMAGE_SIZE; + fprintf(stderr, + "kexec: %s: Warning: Kernel image size set to %lu MiB.\n" + " Please verify compatability with lodaed kernel.\n", + __func__, KERNEL_IMAGE_SIZE / 1024UL / 1024UL); + } + + return 0; +} + +void arch_usage(void) +{ + printf(arm64_opts_usage); +} + +int arch_process_options(int argc, char **argv) +{ + static const char short_options[] = KEXEC_OPT_STR ""; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 } + }; + int opt; + char *cmdline = NULL; + const char *append = NULL; + int do_kexec_file_syscall = 0; + + for (opt = 0; opt != -1; ) { + opt = getopt_long(argc, argv, short_options, options, 0); + + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_DTB: + arm64_opts.dtb = optarg; + break; + case OPT_INITRD: + arm64_opts.initrd = optarg; + break; + case OPT_KEXEC_FILE_SYSCALL: + do_kexec_file_syscall = 1; + case OPT_SERIAL: + arm64_opts.console = optarg; + break; + default: + break; /* Ignore core and unknown options. */ + } + } + + arm64_opts.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arm64_opts.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arm64_opts.initrd); + dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, + (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" : + arm64_opts.dtb)); + dbgprintf("%s:%d: console: %s\n", __func__, __LINE__, + arm64_opts.console); + + if (do_kexec_file_syscall) + arm64_opts.dtb = NULL; + + return 0; +} + +/** + * find_purgatory_sink - Find a sink for purgatory output. + */ + +static uint64_t find_purgatory_sink(const char *console) +{ + int fd, ret; + char device[255], mem[255]; + struct stat sb; + char buffer[10]; + uint64_t iomem = 0x0; + + if (!console) + return 0; + + ret = snprintf(device, sizeof(device), "/sys/class/tty/%s", console); + if (ret < 0 || ret >= sizeof(device)) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return 0; + } + + if (stat(device, &sb) || !S_ISDIR(sb.st_mode)) { + fprintf(stderr, "kexec: %s: No valid console found for %s\n", + __func__, device); + return 0; + } + + ret = snprintf(mem, sizeof(mem), "%s%s", device, "/iomem_base"); + if (ret < 0 || ret >= sizeof(mem)) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return 0; + } + + printf("console memory read from %s\n", mem); + + fd = open(mem, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "kexec: %s: No able to open %s\n", + __func__, mem); + return 0; + } + + memset(buffer, '\0', sizeof(buffer)); + ret = read(fd, buffer, sizeof(buffer)); + if (ret < 0) { + fprintf(stderr, "kexec: %s: not able to read fd\n", __func__); + close(fd); + return 0; + } + + sscanf(buffer, "%lx", &iomem); + printf("console memory is at %#lx\n", iomem); + + close(fd); + return iomem; +} + +/** + * struct dtb - Info about a binary device tree. + * + * @buf: Device tree data. + * @size: Device tree data size. + * @name: Shorthand name of this dtb for messages. + * @path: Filesystem path. + */ + +struct dtb { + char *buf; + off_t size; + const char *name; + const char *path; +}; + +/** + * dump_reservemap - Dump the dtb's reservemap. + */ + +static void dump_reservemap(const struct dtb *dtb) +{ + int i; + + for (i = 0; ; i++) { + uint64_t address; + uint64_t size; + + fdt_get_mem_rsv(dtb->buf, i, &address, &size); + + if (!size) + break; + + dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__, + dtb->name, address, size); + } +} + +/** + * set_bootargs - Set the dtb's bootargs. + */ + +static int set_bootargs(struct dtb *dtb, const char *command_line) +{ + int result; + + if (!command_line || !command_line[0]) + return 0; + + result = dtb_set_bootargs(&dtb->buf, &dtb->size, command_line); + + if (result) { + fprintf(stderr, + "kexec: Set device tree bootargs failed.\n"); + return EFAILED; + } + + return 0; +} + +/** + * read_proc_dtb - Read /proc/device-tree. + */ + +static int read_proc_dtb(struct dtb *dtb) +{ + int result; + struct stat s; + static const char path[] = "/proc/device-tree"; + + result = stat(path, &s); + + if (result) { + dbgprintf("%s: %s\n", __func__, strerror(errno)); + return EFAILED; + } + + dtb->path = path; + create_flatten_tree((char **)&dtb->buf, &dtb->size, NULL); + + return 0; +} + +/** + * read_sys_dtb - Read /sys/firmware/fdt. + */ + +static int read_sys_dtb(struct dtb *dtb) +{ + int result; + struct stat s; + static const char path[] = "/sys/firmware/fdt"; + + result = stat(path, &s); + + if (result) { + dbgprintf("%s: %s\n", __func__, strerror(errno)); + return EFAILED; + } + + dtb->path = path; + dtb->buf = slurp_file(path, &dtb->size); + + return 0; +} + +/** + * read_1st_dtb - Read the 1st stage kernel's dtb. + */ + +static int read_1st_dtb(struct dtb *dtb) +{ + int result; + + dtb->name = "dtb_sys"; + result = read_sys_dtb(dtb); + + if (!result) + goto on_success; + + dtb->name = "dtb_proc"; + result = read_proc_dtb(dtb); + + if (!result) + goto on_success; + + dbgprintf("%s: not found\n", __func__); + return EFAILED; + +on_success: + dbgprintf("%s: found %s\n", __func__, dtb->path); + return 0; +} + +static int get_cells_size(void *fdt, uint32_t *address_cells, + uint32_t *size_cells) +{ + int nodeoffset; + const uint32_t *prop = NULL; + int prop_len; + + /* default values */ + *address_cells = ROOT_NODE_ADDR_CELLS_DEFAULT; + *size_cells = ROOT_NODE_SIZE_CELLS_DEFAULT; + + /* under root node */ + nodeoffset = fdt_path_offset(fdt, "/"); + if (nodeoffset < 0) + goto on_error; + + prop = fdt_getprop(fdt, nodeoffset, PROP_ADDR_CELLS, &prop_len); + if (prop) { + if (prop_len == sizeof(*prop)) + *address_cells = fdt32_to_cpu(*prop); + else + goto on_error; + } + + prop = fdt_getprop(fdt, nodeoffset, PROP_SIZE_CELLS, &prop_len); + if (prop) { + if (prop_len == sizeof(*prop)) + *size_cells = fdt32_to_cpu(*prop); + else + goto on_error; + } + + dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__, + *address_cells, *size_cells); + return 0; + +on_error: + return EFAILED; +} + +static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells, + struct memory_range *range) +{ + dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end); + + /* if *_cells >= 2, cells can hold 64-bit values anyway */ + if ((address_cells == 1) && (range->start >= (1ULL << 32))) + return false; + + if ((size_cells == 1) && + ((range->end - range->start + 1) >= (1ULL << 32))) + return false; + + return true; +} + +static void fill_property(void *buf, uint64_t val, uint32_t cells) +{ + uint32_t val32; + int i; + + if (cells == 1) { + val32 = cpu_to_fdt32((uint32_t)val); + memcpy(buf, &val32, sizeof(uint32_t)); + } else { + for (i = 0; + i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++) + *(char *)buf++ = 0; + + val = cpu_to_fdt64(val); + memcpy(buf, &val, sizeof(uint64_t)); + } +} + +static int fdt_setprop_ranges(void *fdt, int nodeoffset, const char *name, + struct memory_range *ranges, int nr_ranges, bool reverse, + uint32_t address_cells, uint32_t size_cells) +{ + void *buf, *prop; + size_t buf_size; + int i, result; + struct memory_range *range; + + buf_size = (address_cells + size_cells) * sizeof(uint32_t) * nr_ranges; + prop = buf = xmalloc(buf_size); + if (!buf) + return -ENOMEM; + + for (i = 0; i < nr_ranges; i++) { + if (reverse) + range = ranges + (nr_ranges - 1 - i); + else + range = ranges + i; + + fill_property(prop, range->start, address_cells); + prop += address_cells * sizeof(uint32_t); + + fill_property(prop, range->end - range->start + 1, size_cells); + prop += size_cells * sizeof(uint32_t); + } + + result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size); + + free(buf); + + return result; +} + +/** + * setup_2nd_dtb - Setup the 2nd stage kernel's dtb. + */ + +static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash) +{ + uint32_t address_cells, size_cells; + uint64_t fdt_val64; + uint64_t *prop; + char *new_buf = NULL; + int len, range_len; + int nodeoffset; + int new_size; + int i, result, kaslr_seed; + + result = fdt_check_header(dtb->buf); + + if (result) { + fprintf(stderr, "kexec: Invalid 2nd device tree.\n"); + return EFAILED; + } + + result = set_bootargs(dtb, command_line); + if (result) { + fprintf(stderr, "kexec: cannot set bootargs.\n"); + result = -EINVAL; + goto on_error; + } + + /* determine #address-cells and #size-cells */ + result = get_cells_size(dtb->buf, &address_cells, &size_cells); + if (result) { + fprintf(stderr, "kexec: cannot determine cells-size.\n"); + result = -EINVAL; + goto on_error; + } + + if (!cells_size_fitted(address_cells, size_cells, + &elfcorehdr_mem)) { + fprintf(stderr, "kexec: elfcorehdr doesn't fit cells-size.\n"); + result = -EINVAL; + goto on_error; + } + + for (i = 0; i < usablemem_rgns.size; i++) { + if (!cells_size_fitted(address_cells, size_cells, + &crash_reserved_mem[i])) { + fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n"); + result = -EINVAL; + goto on_error; + } + } + + /* duplicate dt blob */ + range_len = sizeof(uint32_t) * (address_cells + size_cells); + new_size = fdt_totalsize(dtb->buf) + + fdt_prop_len(PROP_ELFCOREHDR, range_len) + + fdt_prop_len(PROP_USABLE_MEM_RANGE, range_len * usablemem_rgns.size); + + new_buf = xmalloc(new_size); + result = fdt_open_into(dtb->buf, new_buf, new_size); + if (result) { + dbgprintf("%s: fdt_open_into failed: %s\n", __func__, + fdt_strerror(result)); + result = -ENOSPC; + goto on_error; + } + + /* fixup 'kaslr-seed' with a random value, if supported */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + prop = fdt_getprop_w(new_buf, nodeoffset, + "kaslr-seed", &len); + if (!prop || len != sizeof(uint64_t)) { + dbgprintf("%s: no kaslr-seed found\n", + __func__); + /* for kexec warm reboot case, we don't need to fixup + * other dtb properties + */ + if (!on_crash) { + dump_reservemap(dtb); + if (new_buf) + free(new_buf); + + return result; + } + } else { + kaslr_seed = fdt64_to_cpu(*prop); + + /* kaslr_seed must be wiped clean by primary + * kernel during boot + */ + if (kaslr_seed != 0) { + dbgprintf("%s: kaslr-seed is not wiped to 0.\n", + __func__); + result = -EINVAL; + goto on_error; + } + + /* + * Invoke the getrandom system call with + * GRND_NONBLOCK, to make sure we + * have a valid random seed to pass to the + * secondary kernel. + */ + result = syscall(SYS_getrandom, &fdt_val64, + sizeof(fdt_val64), + GRND_NONBLOCK); + + if(result == -1) { + fprintf(stderr, "%s: Reading random bytes failed.\n", + __func__); + + /* Currently on some arm64 platforms this + * 'getrandom' system call fails while booting + * the platform. + * + * In case, this happens at best we can set + * the 'kaslr_seed' as 0, indicating that the + * 2nd kernel will be booted with a 'nokaslr' + * like behaviour. + */ + fdt_val64 = 0UL; + dbgprintf("%s: Disabling KASLR in secondary kernel.\n", + __func__); + } + + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_inplace(new_buf, + nodeoffset, "kaslr-seed", + &fdt_val64, sizeof(fdt_val64)); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", + __func__, fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + } + + if (on_crash) { + /* add linux,elfcorehdr */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_ranges(new_buf, nodeoffset, + PROP_ELFCOREHDR, &elfcorehdr_mem, 1, false, + address_cells, size_cells); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + + /* + * add linux,usable-memory-range + * + * crash dump kernel support one or two regions, to make + * compatibility with existing user-space and older kdump, the + * low region is always the last one. + */ + nodeoffset = fdt_path_offset(new_buf, "/chosen"); + result = fdt_setprop_ranges(new_buf, nodeoffset, + PROP_USABLE_MEM_RANGE, + usablemem_rgns.ranges, usablemem_rgns.size, true, + address_cells, size_cells); + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + result = -EINVAL; + goto on_error; + } + } + + fdt_pack(new_buf); + dtb->buf = new_buf; + dtb->size = fdt_totalsize(new_buf); + + dump_reservemap(dtb); + + return result; + +on_error: + fprintf(stderr, "kexec: %s failed.\n", __func__); + if (new_buf) + free(new_buf); + + return result; +} + +unsigned long arm64_locate_kernel_segment(struct kexec_info *info) +{ + unsigned long hole; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + unsigned long hole_end; + + hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ? + mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start); + hole = _ALIGN_UP(hole, MiB(2)); + hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size; + + if ((hole_end > mem_max) || + (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) { + dbgprintf("%s: Crash kernel out of range\n", __func__); + hole = ULONG_MAX; + } + } else { + hole = locate_hole(info, + arm64_mem.text_offset + arm64_mem.image_size, + MiB(2), 0, ULONG_MAX, 1); + + if (hole == ULONG_MAX) + dbgprintf("%s: locate_hole failed\n", __func__); + } + + return hole; +} + +/** + * arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments. + */ + +int arm64_load_other_segments(struct kexec_info *info, + unsigned long image_base) +{ + int result; + unsigned long dtb_base; + unsigned long hole_min; + unsigned long hole_max; + unsigned long initrd_end; + uint64_t purgatory_sink; + char *initrd_buf = NULL; + struct dtb dtb; + char command_line[COMMAND_LINE_SIZE] = ""; + + if (arm64_opts.command_line) { + if (strlen(arm64_opts.command_line) > + sizeof(command_line) - 1) { + fprintf(stderr, + "Kernel command line too long for kernel!\n"); + return EFAILED; + } + + strncpy(command_line, arm64_opts.command_line, + sizeof(command_line) - 1); + command_line[sizeof(command_line) - 1] = 0; + } + + purgatory_sink = find_purgatory_sink(arm64_opts.console); + + dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__, + purgatory_sink); + + if (arm64_opts.dtb) { + dtb.name = "dtb_user"; + dtb.buf = slurp_file(arm64_opts.dtb, &dtb.size); + } else { + result = read_1st_dtb(&dtb); + + if (result) { + fprintf(stderr, + "kexec: Error: No device tree available.\n"); + return EFAILED; + } + } + + result = setup_2nd_dtb(&dtb, command_line, + info->kexec_flags & KEXEC_ON_CRASH); + + if (result) + return EFAILED; + + /* Put the other segments after the image. */ + + hole_min = image_base + arm64_mem.image_size; + if (info->kexec_flags & KEXEC_ON_CRASH) + hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end; + else + hole_max = ULONG_MAX; + + if (arm64_opts.initrd) { + initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size); + + if (!initrd_buf) + fprintf(stderr, "kexec: Empty ramdisk file.\n"); + else { + /* Put the initrd after the kernel. */ + + initrd_base = add_buffer_phys_virt(info, initrd_buf, + initrd_size, initrd_size, 0, + hole_min, hole_max, 1, 0); + + initrd_end = initrd_base + initrd_size; + + /* Check limits as specified in booting.txt. + * The kernel may have as little as 32 GB of address space to map + * system memory and both kernel and initrd must be 1GB aligend. + */ + + if (_ALIGN_UP(initrd_end, GiB(1)) - _ALIGN_DOWN(image_base, GiB(1)) > GiB(32)) { + fprintf(stderr, "kexec: Error: image + initrd too big.\n"); + return EFAILED; + } + + dbgprintf("initrd: base %lx, size %lxh (%ld)\n", + initrd_base, initrd_size, initrd_size); + + result = dtb_set_initrd((char **)&dtb.buf, + &dtb.size, initrd_base, + initrd_base + initrd_size); + + if (result) + return EFAILED; + } + } + + if (!initrd_buf) { + /* Don't reuse the initrd addresses from 1st DTB */ + dtb_clear_initrd((char **)&dtb.buf, &dtb.size); + } + + /* Check size limit as specified in booting.txt. */ + + if (dtb.size > MiB(2)) { + fprintf(stderr, "kexec: Error: dtb too big.\n"); + return EFAILED; + } + + dtb_base = add_buffer_phys_virt(info, dtb.buf, dtb.size, dtb.size, + 0, hole_min, hole_max, 1, 0); + + /* dtb_base is valid if we got here. */ + + dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb.size, + dtb.size); + + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + hole_min, hole_max, 1, 0); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "purgatory_start"); + + elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink, + sizeof(purgatory_sink)); + + elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", &image_base, + sizeof(image_base)); + + elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base, + sizeof(dtb_base)); + + return 0; +} + +/** + * virt_to_phys - For processing elf file values. + */ + +unsigned long virt_to_phys(unsigned long v) +{ + unsigned long p; + + p = v - get_vp_offset() + get_phys_offset(); + + return p; +} + +/** + * phys_to_virt - For crashdump setup. + */ + +unsigned long phys_to_virt(struct crash_elf_info *elf_info, + unsigned long long p) +{ + unsigned long v; + + v = p - get_phys_offset() + elf_info->page_offset; + + return v; +} + +/** + * add_segment - Use virt_to_phys when loading elf files. + */ + +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +static inline void set_phys_offset(int64_t v, char *set_method) +{ + if (arm64_mem.phys_offset == arm64_mem_ngv + || v < arm64_mem.phys_offset) { + arm64_mem.phys_offset = v; + dbgprintf("%s: phys_offset : %016lx (method : %s)\n", + __func__, arm64_mem.phys_offset, + set_method); + } +} + +/** + * get_va_bits - Helper for getting VA_BITS + */ + +static int get_va_bits(void) +{ + unsigned long long stext_sym_addr; + + /* + * if already got from kcore + */ + if (va_bits != -1) + goto out; + + + /* For kernel older than v4.19 */ + fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n"); + stext_sym_addr = get_kernel_sym("_stext"); + + if (stext_sym_addr == 0) { + fprintf(stderr, "Can't get the symbol of _stext.\n"); + return -1; + } + + /* Derive va_bits as per arch/arm64/Kconfig */ + if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) { + va_bits = 36; + } else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) { + va_bits = 39; + } else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) { + va_bits = 42; + } else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) { + va_bits = 47; + } else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) { + va_bits = 48; + } else { + fprintf(stderr, + "Cannot find a proper _stext for calculating VA_BITS\n"); + return -1; + } + +out: + dbgprintf("va_bits : %d\n", va_bits); + + return 0; +} + +/** + * get_page_offset - Helper for getting PAGE_OFFSET + */ + +int get_page_offset(unsigned long *page_offset) +{ + unsigned long long text_sym_addr, kernel_va_mid; + int ret; + + text_sym_addr = get_kernel_sym("_text"); + if (text_sym_addr == 0) { + fprintf(stderr, "Can't get the symbol of _text to calculate page_offset.\n"); + return -1; + } + + ret = get_va_bits(); + if (ret < 0) + return ret; + + /* Since kernel 5.4, kernel image is put above + * UINT64_MAX << (va_bits - 1) + */ + kernel_va_mid = UINT64_MAX << (va_bits - 1); + /* older kernel */ + if (text_sym_addr < kernel_va_mid) + *page_offset = UINT64_MAX << (va_bits - 1); + else + *page_offset = UINT64_MAX << va_bits; + + dbgprintf("page_offset : %lx\n", *page_offset); + + return 0; +} + +static void arm64_scan_vmcoreinfo(char *pos) +{ + const char *str; + + str = "NUMBER(VA_BITS)="; + if (memcmp(str, pos, strlen(str)) == 0) + va_bits = strtoul(pos + strlen(str), NULL, 10); +} + +/** + * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET (and va_bits) + * from VMCOREINFO note inside 'kcore'. + */ + +static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset) +{ + int fd, ret = 0; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo; + ret = read_phys_offset_elf_kcore(fd, phys_offset); + + close(fd); + return ret; +} + +/** + * get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET + * from PT_LOADs inside 'kcore'. + */ + +int get_phys_base_from_pt_load(long *phys_offset) +{ + int i, fd, ret; + unsigned long long phys_start; + unsigned long long virt_start; + + ret = get_page_offset(&page_offset); + if (ret < 0) + return ret; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + read_elf(fd); + + for (i = 0; get_pt_load(i, + &phys_start, NULL, &virt_start, NULL); + i++) { + if (virt_start != NOT_KV_ADDR + && virt_start >= page_offset + && phys_start != NOT_PADDR) + *phys_offset = phys_start - + (virt_start & ~page_offset); + } + + close(fd); + return 0; +} + +static bool to_be_excluded(char *str, unsigned long long start, unsigned long long end) +{ + if (!strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) { + uint64_t load_start, load_end; + + if (!get_crash_kernel_load_range(&load_start, &load_end) && + (load_start == start) && (load_end == end)) + return false; + + return true; + } + + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA))) + return false; + else + return true; +} + +/** + * get_memory_ranges - Try to get the memory ranges from + * /proc/iomem. + */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + long phys_offset = -1; + FILE *fp; + const char *iomem = proc_iomem(); + char line[MAX_LINE], *str; + unsigned long long start, end; + int n, consumed; + struct memory_ranges memranges; + struct memory_range *last, excl_range; + int ret; + + if (!try_read_phys_offset_from_kcore) { + /* Since kernel version 4.19, 'kcore' contains + * a new PT_NOTE which carries the VMCOREINFO + * information. + * If the same is available, one should prefer the + * same to retrieve 'PHYS_OFFSET' value exported by + * the kernel as this is now the standard interface + * exposed by kernel for sharing machine specific + * details with the userland. + */ + ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset); + if (!ret) { + if (phys_offset != -1) + set_phys_offset(phys_offset, + "vmcoreinfo pt_note"); + } else { + /* If we are running on a older kernel, + * try to retrieve the 'PHYS_OFFSET' value + * exported by the kernel in the 'kcore' + * file by reading the PT_LOADs and determining + * the correct combination. + */ + ret = get_phys_base_from_pt_load(&phys_offset); + if (!ret) + if (phys_offset != -1) + set_phys_offset(phys_offset, + "pt_load"); + } + + try_read_phys_offset_from_kcore = true; + } + + fp = fopen(iomem, "r"); + if (!fp) + die("Cannot open %s\n", iomem); + + memranges.ranges = NULL; + memranges.size = memranges.max_size = 0; + + while (fgets(line, sizeof(line), fp) != 0) { + n = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (n != 2) + continue; + str = line + consumed; + + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) { + ret = mem_regions_alloc_and_add(&memranges, + start, end - start + 1, RANGE_RAM); + if (ret) { + fprintf(stderr, + "Cannot allocate memory for ranges\n"); + fclose(fp); + return -ENOMEM; + } + + dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__, + memranges.size - 1, + memranges.ranges[memranges.size - 1].start, + memranges.ranges[memranges.size - 1].end); + } else if (to_be_excluded(str, start, end)) { + if (!memranges.size) + continue; + + /* + * Note: mem_regions_exclude() doesn't guarantee + * that the ranges are sorted out, but as long as + * we cope with /proc/iomem, we only operate on + * the last entry and so it is safe. + */ + + /* The last System RAM range */ + last = &memranges.ranges[memranges.size - 1]; + + if (last->end < start) + /* New resource outside of System RAM */ + continue; + if (end < last->start) + /* Already excluded by parent resource */ + continue; + + excl_range.start = start; + excl_range.end = end; + ret = mem_regions_alloc_and_exclude(&memranges, &excl_range); + if (ret) { + fprintf(stderr, + "Cannot allocate memory for ranges (exclude)\n"); + fclose(fp); + return -ENOMEM; + } + dbgprintf("%s:- %016llx - %016llx\n", + __func__, start, end); + } + } + + fclose(fp); + + *range = memranges.ranges; + *ranges = memranges.size; + + /* As a fallback option, we can try determining the PHYS_OFFSET + * value from the '/proc/iomem' entries as well. + * + * But note that this can be flaky, as on certain arm64 + * platforms, it has been noticed that due to a hole at the + * start of physical ram exposed to kernel + * (i.e. it doesn't start from address 0), the kernel still + * calculates the 'memstart_addr' kernel variable as 0. + * + * Whereas the SYSTEM_RAM or IOMEM_RESERVED range in + * '/proc/iomem' would carry a first entry whose start address + * is non-zero (as the physical ram exposed to the kernel + * starts from a non-zero address). + * + * In such cases, if we rely on '/proc/iomem' entries to + * calculate the phys_offset, then we will have mismatch + * between the user-space and kernel space 'PHYS_OFFSET' + * value. + */ + if (memranges.size) + set_phys_offset(memranges.ranges[0].start, "iomem"); + + dbgprint_mem_range("System RAM ranges;", + memranges.ranges, memranges.size); + + return 0; +} + +int arch_compat_trampoline(struct kexec_info *info) +{ + return 0; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + return (ehdr->e_machine == EM_AARCH64); +} + +enum aarch64_rel_type { + R_AARCH64_NONE = 0, + R_AARCH64_ABS64 = 257, + R_AARCH64_PREL32 = 261, + R_AARCH64_MOVW_UABS_G0_NC = 264, + R_AARCH64_MOVW_UABS_G1_NC = 266, + R_AARCH64_MOVW_UABS_G2_NC = 268, + R_AARCH64_MOVW_UABS_G3 =269, + R_AARCH64_LD_PREL_LO19 = 273, + R_AARCH64_ADR_PREL_LO21 = 274, + R_AARCH64_ADR_PREL_PG_HI21 = 275, + R_AARCH64_ADD_ABS_LO12_NC = 277, + R_AARCH64_JUMP26 = 282, + R_AARCH64_CALL26 = 283, + R_AARCH64_LDST64_ABS_LO12_NC = 286, + R_AARCH64_LDST128_ABS_LO12_NC = 299 +}; + +static uint32_t get_bits(uint32_t value, int start, int end) +{ + uint32_t mask = ((uint32_t)1 << (end + 1 - start)) - 1; + return (value >> start) & mask; +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *UNUSED(sym), + unsigned long r_type, void *ptr, unsigned long address, + unsigned long value) +{ + uint64_t *loc64; + uint32_t *loc32; + uint64_t *location = (uint64_t *)ptr; + uint64_t data = *location; + uint64_t imm; + const char *type = NULL; + + switch((enum aarch64_rel_type)r_type) { + case R_AARCH64_ABS64: + type = "ABS64"; + loc64 = ptr; + *loc64 = cpu_to_elf64(ehdr, value); + break; + case R_AARCH64_PREL32: + type = "PREL32"; + loc32 = ptr; + *loc32 = cpu_to_elf32(ehdr, value - address); + break; + + /* Set a MOV[KZ] immediate field to bits [15:0] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G0_NC: + type = "MOVW_UABS_G0_NC"; + loc32 = ptr; + imm = get_bits(value, 0, 15); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [31:16] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G1_NC: + type = "MOVW_UABS_G1_NC"; + loc32 = ptr; + imm = get_bits(value, 16, 31); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [47:32] of X. No overflow check */ + case R_AARCH64_MOVW_UABS_G2_NC: + type = "MOVW_UABS_G2_NC"; + loc32 = ptr; + imm = get_bits(value, 32, 47); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + /* Set a MOV[KZ] immediate field to bits [63:48] of X */ + case R_AARCH64_MOVW_UABS_G3: + type = "MOVW_UABS_G3"; + loc32 = ptr; + imm = get_bits(value, 48, 63); + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); + break; + + case R_AARCH64_LD_PREL_LO19: + type = "LD_PREL_LO19"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) << 3) & 0xffffe0)); + break; + case R_AARCH64_ADR_PREL_LO21: + if (value & 3) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "ADR_PREL_LO21"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) << 3) & 0xffffe0)); + break; + case R_AARCH64_ADR_PREL_PG_HI21: + type = "ADR_PREL_PG_HI21"; + imm = ((value & ~0xfff) - (address & ~0xfff)) >> 12; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((imm & 3) << 29) + ((imm & 0x1ffffc) << (5 - 2))); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + type = "ADD_ABS_LO12_NC"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((value & 0xfff) << 10)); + break; + case R_AARCH64_JUMP26: + type = "JUMP26"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) >> 2) & 0x3ffffff)); + break; + case R_AARCH64_CALL26: + type = "CALL26"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + (((value - address) >> 2) & 0x3ffffff)); + break; + /* encode imm field with bits [11:3] of value */ + case R_AARCH64_LDST64_ABS_LO12_NC: + if (value & 7) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "LDST64_ABS_LO12_NC"; + loc32 = ptr; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + + ((value & 0xff8) << (10 - 3))); + break; + + /* encode imm field with bits [11:4] of value */ + case R_AARCH64_LDST128_ABS_LO12_NC: + if (value & 15) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "LDST128_ABS_LO12_NC"; + loc32 = ptr; + imm = value & 0xff0; + *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << (10 - 4))); + break; + default: + die("%s: ERROR Unknown type: %lu\n", __func__, r_type); + break; + } + + dbgprintf("%s: %s %016lx->%016lx\n", __func__, type, data, *location); +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h new file mode 100644 index 0000000..95fb5c2 --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.h @@ -0,0 +1,84 @@ +/* + * ARM64 kexec. + */ + +#if !defined(KEXEC_ARM64_H) +#define KEXEC_ARM64_H + +#include <stdbool.h> +#include <sys/types.h> + +#include "image-header.h" +#include "kexec.h" + +#define KEXEC_SEGMENT_MAX 64 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 +#define COMMAND_LINE_SIZE 2048 /* from kernel */ + +#define KiB(x) ((x) * 1024UL) +#define MiB(x) (KiB(x) * 1024UL) +#define GiB(x) (MiB(x) * 1024UL) + +#define ULONGLONG_MAX (~0ULL) + +/* + * Incorrect address + */ +#define NOT_KV_ADDR (0x0) +#define NOT_PADDR (ULONGLONG_MAX) + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size); +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void elf_arm64_usage(void); + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size); +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void image_arm64_usage(void); + +int uImage_arm64_probe(const char *buf, off_t len); +int uImage_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void uImage_arm64_usage(void); + +int pez_arm64_probe(const char *kernel_buf, off_t kernel_size); +int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void pez_arm64_usage(void); + + +extern off_t initrd_base; +extern off_t initrd_size; + +/** + * struct arm64_mem - Memory layout info. + */ + +struct arm64_mem { + int64_t phys_offset; + uint64_t text_offset; + uint64_t image_size; + uint64_t vp_offset; +}; + +#define arm64_mem_ngv UINT64_MAX +extern struct arm64_mem arm64_mem; + +uint64_t get_phys_offset(void); +uint64_t get_vp_offset(void); +int get_page_offset(unsigned long *offset); + +static inline void reset_vp_offset(void) +{ + arm64_mem.vp_offset = arm64_mem_ngv; +} + +int arm64_process_image_header(const struct arm64_image_header *h); +unsigned long arm64_locate_kernel_segment(struct kexec_info *info); +int arm64_load_other_segments(struct kexec_info *info, + unsigned long image_base); + +#endif diff --git a/kexec/arch/arm64/kexec-elf-arm64.c b/kexec/arch/arm64/kexec-elf-arm64.c new file mode 100644 index 0000000..e14f8e9 --- /dev/null +++ b/kexec/arch/arm64/kexec-elf-arm64.c @@ -0,0 +1,170 @@ +/* + * ARM64 kexec elf support. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <linux/elf.h> + +#include "arch/options.h" +#include "crashdump-arm64.h" +#include "kexec-arm64.h" +#include "kexec-elf.h" +#include "kexec-syscall.h" + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: Not an ELF executable.\n", __func__); + goto on_exit; + } + + if (ehdr.e_machine != EM_AARCH64) { + dbgprintf("%s: Not an AARCH64 ELF executable.\n", __func__); + result = -1; + goto on_exit; + } + + result = 0; +on_exit: + free_elf_info(&ehdr); + return result; +} + +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct arm64_image_header *header = NULL; + unsigned long kernel_segment; + struct mem_ehdr ehdr; + int result; + int i; + + if (info->file_mode) { + fprintf(stderr, + "ELF executable is not supported in kexec_file\n"); + + return EFAILED; + } + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: build_elf_exec_info failed\n", __func__); + goto exit; + } + + /* Find and process the arm64 image header. */ + + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr = &ehdr.e_phdr[i]; + unsigned long header_offset; + + if (phdr->p_type != PT_LOAD) + continue; + + /* + * When CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET=y the image header + * could be offset in the elf segment. The linker script sets + * ehdr.e_entry to the start of text. + */ + + header_offset = ehdr.e_entry - phdr->p_vaddr; + + header = (const struct arm64_image_header *)( + kernel_buf + phdr->p_offset + header_offset); + + if (!arm64_process_image_header(header)) { + dbgprintf("%s: e_entry: %016llx\n", __func__, + ehdr.e_entry); + dbgprintf("%s: p_vaddr: %016llx\n", __func__, + phdr->p_vaddr); + dbgprintf("%s: header_offset: %016lx\n", __func__, + header_offset); + + break; + } + } + + if (i == ehdr.e_phnum) { + dbgprintf("%s: Valid arm64 header not found\n", __func__); + result = EFAILED; + goto exit; + } + + kernel_segment = arm64_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + arm64_mem.vp_offset = _ALIGN_DOWN(ehdr.e_entry, MiB(2)); + if (!(info->kexec_flags & KEXEC_ON_CRASH)) + arm64_mem.vp_offset -= kernel_segment - get_phys_offset(); + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: text_offset: %016lx\n", __func__, + arm64_mem.text_offset); + dbgprintf("%s: image_size: %016lx\n", __func__, + arm64_mem.image_size); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + arm64_mem.phys_offset); + dbgprintf("%s: vp_offset: %016lx\n", __func__, + arm64_mem.vp_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in elf header in order to load + * vmlinux (elf_exec) into crash kernel's memory + */ + fixup_elf_addrs(&ehdr); + + result = elf_exec_load(&ehdr, info); + + if (result) { + dbgprintf("%s: elf_exec_load failed\n", __func__); + goto exit; + } + + /* load additional data */ + result = arm64_load_other_segments(info, kernel_segment + + arm64_mem.text_offset); + +exit: + reset_vp_offset(); + free_elf_info(&ehdr); + if (result) + fprintf(stderr, "kexec: Bad elf image file, load failed.\n"); + return result; +} + +void elf_arm64_usage(void) +{ + printf( +" An ARM64 ELF image, big or little endian.\n" +" Typically vmlinux or a stripped version of vmlinux.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-image-arm64.c b/kexec/arch/arm64/kexec-image-arm64.c new file mode 100644 index 0000000..a196747 --- /dev/null +++ b/kexec/arch/arm64/kexec-image-arm64.c @@ -0,0 +1,119 @@ +/* + * ARM64 kexec binary image support. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include "crashdump-arm64.h" +#include "image-header.h" +#include "kexec.h" +#include "kexec-arm64.h" +#include "kexec-syscall.h" +#include "arch/options.h" + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + const struct arm64_image_header *h; + + if (kernel_size < sizeof(struct arm64_image_header)) { + dbgprintf("%s: No arm64 image header.\n", __func__); + return -1; + } + + h = (const struct arm64_image_header *)(kernel_buf); + + if (!arm64_header_check_magic(h)) { + dbgprintf("%s: Bad arm64 image header.\n", __func__); + return -1; + } + + return 0; +} + +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct arm64_image_header *header; + unsigned long kernel_segment; + int result; + + if (info->file_mode) { + if (arm64_opts.initrd) { + info->initrd_fd = open(arm64_opts.initrd, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, + "Could not open initrd file %s:%s\n", + arm64_opts.initrd, strerror(errno)); + result = EFAILED; + goto exit; + } + } + + if (arm64_opts.command_line) { + info->command_line = (char *)arm64_opts.command_line; + info->command_line_len = + strlen(arm64_opts.command_line) + 1; + } + + return 0; + } + + header = (const struct arm64_image_header *)(kernel_buf); + + if (arm64_process_image_header(header)) + return EFAILED; + + kernel_segment = arm64_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: text_offset: %016lx\n", __func__, + arm64_mem.text_offset); + dbgprintf("%s: image_size: %016lx\n", __func__, + arm64_mem.image_size); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + arm64_mem.phys_offset); + dbgprintf("%s: vp_offset: %016lx\n", __func__, + arm64_mem.vp_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + add_segment_phys_virt(info, kernel_buf, kernel_size, + kernel_segment + arm64_mem.text_offset, + arm64_mem.image_size, 0); + + /* load additional data */ + result = arm64_load_other_segments(info, kernel_segment + + arm64_mem.text_offset); + +exit: + if (result) + fprintf(stderr, "kexec: load failed.\n"); + return result; +} + +void image_arm64_usage(void) +{ + printf( +" An ARM64 binary image, compressed or not, big or little endian.\n" +" Typically an Image file.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-uImage-arm64.c b/kexec/arch/arm64/kexec-uImage-arm64.c new file mode 100644 index 0000000..c466913 --- /dev/null +++ b/kexec/arch/arm64/kexec-uImage-arm64.c @@ -0,0 +1,52 @@ +/* + * uImage support added by David Woodhouse <dwmw2@infradead.org> + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-arm64.h" + +int uImage_arm64_probe(const char *buf, off_t len) +{ + int ret; + + ret = uImage_probe_kernel(buf, len, IH_ARCH_ARM64); + + /* 0 - valid uImage. + * -1 - uImage is corrupted. + * 1 - image is not a uImage. + */ + if (!ret) + return 0; + else + return -1; +} + +int uImage_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct Image_info img; + int ret; + + if (info->file_mode) { + fprintf(stderr, + "uImage is not supported in kexec_file\n"); + + return EFAILED; + } + + ret = uImage_load(buf, len, &img); + if (ret) + return ret; + + return image_arm64_load(argc, argv, img.buf, img.len, info); +} + +void uImage_arm64_usage(void) +{ + printf( +" An ARM64 U-boot uImage file, compressed or not, big or little endian.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-vmlinuz-arm64.c b/kexec/arch/arm64/kexec-vmlinuz-arm64.c new file mode 100644 index 0000000..c0ee47c --- /dev/null +++ b/kexec/arch/arm64/kexec-vmlinuz-arm64.c @@ -0,0 +1,110 @@ +/* + * ARM64 PE compressed Image (vmlinuz, ZBOOT) support. + * + * Several distros use 'make zinstall' rule inside + * 'arch/arm64/boot/Makefile' to install the arm64 + * ZBOOT compressed file inside the boot destination + * directory (for e.g. /boot). + * + * Currently we cannot use kexec_file_load() to load vmlinuz + * PE images that self decompress. + * + * To support ZBOOT, we should: + * a). Copy the compressed contents of vmlinuz to a temporary file. + * b). Decompress (gunzip-decompress) the contents inside the + * temporary file. + * c). Validate the resulting image and write it back to the + * temporary file. + * d). Pass the 'fd' of the temporary file to the kernel space. + * + * Note this, module doesn't provide a _load() function instead + * relying on image_arm64_load() to load the resulting decompressed + * image. + * + * So basically the kernel space still gets a decompressed + * kernel image to load via kexec-tools. + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include "kexec-arm64.h" +#include <kexec-pe-zboot.h> +#include "arch/options.h" + +static int kernel_fd = -1; + +/* Returns: + * -1 : in case of error/invalid format (not a valid PE+compressed ZBOOT format. + */ +int pez_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + int ret = -1; + const struct arm64_image_header *h; + char *buf; + off_t buf_sz; + + buf = (char *)kernel_buf; + buf_sz = kernel_size; + if (!buf) + return -1; + h = (const struct arm64_image_header *)buf; + + dbgprintf("%s: PROBE.\n", __func__); + if (buf_sz < sizeof(struct arm64_image_header)) { + dbgprintf("%s: Not large enough to be a PE image.\n", __func__); + return -1; + } + if (!arm64_header_check_pe_sig(h)) { + dbgprintf("%s: Not an PE image.\n", __func__); + return -1; + } + + if (buf_sz < sizeof(struct arm64_image_header) + h->pe_header) { + dbgprintf("%s: PE image offset larger than image.\n", __func__); + return -1; + } + + if (memcmp(&buf[h->pe_header], + arm64_pe_machtype, sizeof(arm64_pe_machtype))) { + dbgprintf("%s: PE header doesn't match machine type.\n", __func__); + return -1; + } + + ret = pez_prepare(buf, buf_sz, &kernel_fd); + + if (!ret) { + /* validate the arm64 specific header */ + struct arm64_image_header hdr_check; + if (read(kernel_fd, &hdr_check, sizeof(hdr_check)) != sizeof(hdr_check)) + goto bad_header; + + lseek(kernel_fd, 0, SEEK_SET); + + if (!arm64_header_check_magic(&hdr_check)) { + dbgprintf("%s: Bad arm64 image header.\n", __func__); + goto bad_header; + } + } + + return ret; +bad_header: + close(kernel_fd); + free(buf); + return -1; +} + +int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + info->kernel_fd = kernel_fd; + return image_arm64_load(argc, argv, buf, len, info); +} + +void pez_arm64_usage(void) +{ + printf( +" An ARM64 vmlinuz, PE image of a compressed, little endian.\n" +" kernel, built with ZBOOT enabled.\n\n"); +} diff --git a/kexec/arch/cris/Makefile b/kexec/arch/cris/Makefile new file mode 100644 index 0000000..4982f3e --- /dev/null +++ b/kexec/arch/cris/Makefile @@ -0,0 +1,13 @@ +cris_KEXEC_SRCS = kexec/arch/cris/kexec-cris.c +cris_KEXEC_SRCS += kexec/arch/cris/kexec-elf-cris.c +cris_KEXEC_SRCS += kexec/arch/cris/cris-setup-simple.S +cris_KEXEC_SRCS += kexec/arch/cris/kexec-elf-rel-cris.c + +cris_ADD_BUFFER = +cris_ADD_SEGMENT = +cris_VIRT_TO_PHYS = + +dist += kexec/arch/cris/Makefile $(cris_KEXEC_SRCS) \ + kexec/arch/cris/kexec-cris.h \ + kexec/arch/cris/include/arch/options.h + diff --git a/kexec/arch/cris/cris-setup-simple.S b/kexec/arch/cris/cris-setup-simple.S new file mode 100644 index 0000000..764f188 --- /dev/null +++ b/kexec/arch/cris/cris-setup-simple.S @@ -0,0 +1,31 @@ +/* + * cris-setup-simple.S - code to execute before stepping into the new kernel. + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + .data + .globl cris_trampoline +cris_trampoline: + .balign 4 + lapc cris_regframe, $sp + moveq 0, $r0 + move $r0, $pid + + movem [$sp+], $r14 + jump $r0 + nop + + .globl cris_regframe +cris_regframe: + .balign 4 + .fill 16, 4, 0 +cris_trampoline_end: + + .globl cris_trampoline_size +cris_trampoline_size: + .long cris_trampoline_end - cris_trampoline + diff --git a/kexec/arch/cris/include/arch/options.h b/kexec/arch/cris/include/arch/options.h new file mode 100644 index 0000000..1c1a029 --- /dev/null +++ b/kexec/arch/cris/include/arch/options.h @@ -0,0 +1,35 @@ +#ifndef KEXEC_ARCH_CRIS_OPTIONS_H +#define KEXEC_ARCH_CRIS_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"append", 1, 0, OPT_APPEND}, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_CRIS_OPTIONS_H */ diff --git a/kexec/arch/cris/kexec-cris.c b/kexec/arch/cris/kexec-cris.c new file mode 100644 index 0000000..3b69709 --- /dev/null +++ b/kexec/arch/cris/kexec-cris.c @@ -0,0 +1,111 @@ +/* + * kexec-cris.c + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-cris.h" +#include <arch/options.h> + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + memory_range[memory_ranges].start = 0x40000000; + memory_range[memory_ranges].end = 0x41000000; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + + memory_range[memory_ranges].start = 0xc0000000; + memory_range[memory_ranges].end = 0xc1000000; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +struct file_type file_type[] = { + {"elf-cris", elf_cris_probe, elf_cris_load, elf_cris_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + { "cris", KEXEC_ARCH_CRIS }, + { "crisv32", KEXEC_ARCH_CRIS }, + { 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return (addr) & 0x7fffffff; +} + +/* + * add_segment() should convert base to a physical address on cris, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on cris, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} + diff --git a/kexec/arch/cris/kexec-cris.h b/kexec/arch/cris/kexec-cris.h new file mode 100644 index 0000000..7ee9945 --- /dev/null +++ b/kexec/arch/cris/kexec-cris.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_CRIS_H +#define KEXEC_CRIS_H + +int elf_cris_probe(const char *buf, off_t len); +int elf_cris_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_cris_usage(void); + +#endif /* KEXEC_CRIS_H */ diff --git a/kexec/arch/cris/kexec-elf-cris.c b/kexec/arch/cris/kexec-elf-cris.c new file mode 100644 index 0000000..7e251e6 --- /dev/null +++ b/kexec/arch/cris/kexec-elf-cris.c @@ -0,0 +1,133 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * Based on x86 implementation, + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include <arch/options.h> +#include "kexec-cris.h" + +int elf_cris_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_CRIS) { + result = -1; + goto out; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_cris_usage(void) +{ + printf(" --append=STRING Set the kernel command line to STRING\n" + ); +} + +#define CRAMFS_MAGIC 0x28cd3d45 +#define JHEAD_MAGIC 0x1FF528A6 +#define JHEAD_SIZE 8 +#define RAM_INIT_MAGIC 0x56902387 +#define COMMAND_LINE_MAGIC 0x87109563 +#define NAND_BOOT_MAGIC 0x9a9db001 + +int elf_cris_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line; + unsigned int *trampoline_buf; + unsigned long trampoline_base; + int opt; + extern void cris_trampoline(void); + extern unsigned long cris_trampoline_size; + extern struct regframe_t { + unsigned int regs[16]; + } cris_regframe; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"append", 1, 0, OPT_APPEND}, + { 0, 0, 0, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + cris_regframe.regs[0] = virt_to_phys(ehdr.e_entry); + cris_regframe.regs[8] = RAM_INIT_MAGIC; + cris_regframe.regs[12] = NAND_BOOT_MAGIC; + + trampoline_buf = xmalloc(cris_trampoline_size); + trampoline_base = add_buffer_virt(info, + trampoline_buf, + cris_trampoline_size, + cris_trampoline_size, + 4, 0, elf_max_addr(&ehdr), 1); + memcpy(trampoline_buf, + cris_trampoline, cris_trampoline_size); + info->entry = (void *)trampoline_base; + return 0; +} diff --git a/kexec/arch/cris/kexec-elf-rel-cris.c b/kexec/arch/cris/kexec-elf-rel-cris.c new file mode 100644 index 0000000..255cc2c --- /dev/null +++ b/kexec/arch/cris/kexec-elf-rel-cris.c @@ -0,0 +1,43 @@ +/* + * kexec-elf-rel-cris.c - kexec Elf relocation routines + * Copyright (C) 2008 AXIS Communications AB + * Written by Edgar E. Iglesias + * + * derived from ../ppc/kexec-elf-rel-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_CRIS) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/hppa/Makefile b/kexec/arch/hppa/Makefile new file mode 100644 index 0000000..e98f6da --- /dev/null +++ b/kexec/arch/hppa/Makefile @@ -0,0 +1,13 @@ +# +# kexec hppa (linux booting linux) +# +hppa_KEXEC_SRCS = kexec/arch/hppa/kexec-hppa.c +hppa_KEXEC_SRCS += kexec/arch/hppa/kexec-elf-hppa.c +hppa_KEXEC_SRCS += kexec/arch/hppa/kexec-elf-rel-hppa.c + +hppa_ADD_SEGMENT = +hppa_VIRT_TO_PHYS = + +dist += kexec/arch/hppa/Makefile $(hppa_KEXEC_SRCS) \ + kexec/arch/hppa/kexec-hppa.h \ + kexec/arch/hppa/include/arch/options.h diff --git a/kexec/arch/hppa/include/arch/options.h b/kexec/arch/hppa/include/arch/options.h new file mode 100644 index 0000000..e9deb51 --- /dev/null +++ b/kexec/arch/hppa/include/arch/options.h @@ -0,0 +1,32 @@ +#ifndef KEXEC_ARCH_HPPA_OPTIONS_H +#define KEXEC_ARCH_HPPA_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, 0, OPT_APPEND }, \ + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, \ + { "append", 1, 0, OPT_APPEND }, \ + { "initrd", 1, 0, OPT_RAMDISK }, \ + { "ramdisk", 1, 0, OPT_RAMDISK }, + + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "" + +/* See the other architectures for details of these; HPPA has no + * loader-specific options yet. + */ +#define OPT_ARCH_MAX (OPT_MAX+0) + +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) + +#define MAX_MEMORY_RANGES 16 +#endif /* KEXEC_ARCH_HPPA_OPTIONS_H */ diff --git a/kexec/arch/hppa/kexec-elf-hppa.c b/kexec/arch/hppa/kexec-elf-hppa.c new file mode 100644 index 0000000..474a919 --- /dev/null +++ b/kexec/arch/hppa/kexec-elf-hppa.c @@ -0,0 +1,159 @@ +/* + * kexec-elf-hppa.c - kexec Elf loader for hppa + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-hppa.h" +#include <arch/options.h> + +#define PAGE_SIZE 4096 + +extern unsigned long phys_offset; + +int elf_hppa_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + phys_offset = ehdr.e_entry & 0xf0000000; + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_PARISC) { + /* for a different architecture */ + fprintf(stderr, "Not for this architecture.\n"); + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_hppa_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + ); +} + +int elf_hppa_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline = NULL, *ramdisk = NULL; + int opt, result, i; + unsigned long ramdisk_addr = 0; + off_t ramdisk_size = 0; + + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ALL_OPT_STR; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + cmdline = strdup(optarg); + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + } + } + + if (info->file_mode) { + if (cmdline) { + info->command_line = cmdline; + info->command_line_len = strlen(cmdline) + 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file " + "%s:%s\n", ramdisk, strerror(errno)); + return -1; + } + } + return 0; + } + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Fixup PT_LOAD segments that include the ELF header (offset zero) */ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type != PT_LOAD || phdr->p_offset) + continue; + + dbgprintf("Removing ELF header from segment %d\n", i); + phdr->p_paddr += PAGE_SIZE; + phdr->p_vaddr += PAGE_SIZE; + phdr->p_filesz -= PAGE_SIZE; + phdr->p_memsz -= PAGE_SIZE; + phdr->p_offset += PAGE_SIZE; + phdr->p_data += PAGE_SIZE; + } + + /* Load the ELF data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + + /* Load ramdisk */ + if (ramdisk) { + void *initrd = slurp_decompress_file(ramdisk, &ramdisk_size); + /* Store ramdisk at top of first memory chunk */ + ramdisk_addr = _ALIGN_DOWN(info->memory_range[0].end - + ramdisk_size + 1, PAGE_SIZE); + if (!buf) + die("Ramdisk load failed\n"); + add_buffer(info, initrd, ramdisk_size, ramdisk_size, + PAGE_SIZE, ramdisk_addr, info->memory_range[0].end, + 1); + } + + return 0; +} diff --git a/kexec/arch/hppa/kexec-elf-rel-hppa.c b/kexec/arch/hppa/kexec-elf-rel-hppa.c new file mode 100644 index 0000000..661b67b --- /dev/null +++ b/kexec/arch/hppa/kexec-elf-rel-hppa.c @@ -0,0 +1,37 @@ +/* + * kexec-elf-rel-hppa.c - kexec Elf relocation routines + * + * Copyright (C) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->e_machine != EM_PARISC) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/hppa/kexec-hppa.c b/kexec/arch/hppa/kexec-hppa.c new file mode 100644 index 0000000..77c9739 --- /dev/null +++ b/kexec/arch/hppa/kexec-hppa.c @@ -0,0 +1,148 @@ +/* + * kexec-hppa.c - kexec for hppa + * + * Copyright (C) 2019 Sven Schnelle <svens@stackframe.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-hppa.h" +#include <arch/options.h> + +#define SYSTEM_RAM "System RAM\n" +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; +unsigned long phys_offset; + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0; + char line[512]; + FILE *fp; + + fp = fopen(iomem, "r"); + + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + + + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + + if (count != 2) + continue; + + str = line + consumed; + + if (memcmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else { + continue; + } + + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + if (++memory_ranges >= MAX_MEMORY_RANGES) + break; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + return 0; +} + +struct file_type file_type[] = { + {"elf-hppa", elf_hppa_probe, elf_hppa_load, elf_hppa_usage}, +}; +int file_types = ARRAY_SIZE(file_type); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "parisc64", KEXEC_ARCH_HPPA }, + { "parisc", KEXEC_ARCH_HPPA }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr - phys_offset; +} diff --git a/kexec/arch/hppa/kexec-hppa.h b/kexec/arch/hppa/kexec-hppa.h new file mode 100644 index 0000000..485e5b6 --- /dev/null +++ b/kexec/arch/hppa/kexec-hppa.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_HPPA_H +#define KEXEC_HPPA_H + +int elf_hppa_probe(const char *buf, off_t len); +int elf_hppa_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_hppa_usage(void); + +#endif /* KEXEC_HPPA_H */ diff --git a/kexec/arch/i386/Makefile b/kexec/arch/i386/Makefile new file mode 100644 index 0000000..f486103 --- /dev/null +++ b/kexec/arch/i386/Makefile @@ -0,0 +1,20 @@ +# +# kexec i386 (linux booting linux) +# +i386_KEXEC_SRCS = kexec/arch/i386/kexec-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-elf-rel-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-bzImage.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-mb2-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +i386_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c +i386_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c + +dist += kexec/arch/i386/Makefile $(i386_KEXEC_SRCS) \ + kexec/arch/i386/crashdump-x86.h \ + kexec/arch/i386/kexec-x86.h \ + kexec/arch/i386/x86-linux-setup.h \ + kexec/arch/i386/include/arch/options.h diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c new file mode 100644 index 0000000..df1f24c --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.c @@ -0,0 +1,1049 @@ +/* + * kexec: Linux boots Linux + * + * Created by: Vivek Goyal (vgoyal@in.ibm.com) + * old x86_64 version Created by: Murali M Chakravarthy (muralim@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include "../../kexec-xen.h" +#include "x86-linux-setup.h" +#include <x86/x86-linux.h> + +extern struct arch_options_t arch_options; + +static int get_kernel_page_offset(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + + if (elf_info->machine == EM_X86_64) { + /* get_kernel_vaddr_and_size will override this */ + elf_info->page_offset = X86_64_PAGE_OFFSET; + } + else if (elf_info->machine == EM_386) { + elf_info->page_offset = X86_PAGE_OFFSET; + } + + return 0; +} + +#define X86_64_KERN_VADDR_ALIGN 0x100000 /* 1MB */ + +/* Read kernel physical load addr from the file returned by proc_iomem() + * (Kernel Code) and store in kexec_info */ +static int get_kernel_paddr(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + uint64_t start; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { + elf_info->kern_paddr_start = start; + dbgprintf("kernel load physical addr start = 0x%016Lx\n", + (unsigned long long)start); + return 0; + } + + fprintf(stderr, "Cannot determine kernel physical load addr\n"); + return -1; +} + +/* Retrieve info regarding virtual address kernel has been compiled for and + * size of the kernel from /proc/kcore. Current /proc/kcore parsing from + * from kexec-tools fails because of malformed elf notes. A kernel patch has + * been submitted. For the folks using older kernels, this function + * hard codes the values to remain backward compatible. Once things stablize + * we should get rid of backward compatible code. */ + +static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), + struct crash_elf_info *elf_info) +{ + int result; + const char kcore[] = "/proc/kcore"; + char *buf; + struct mem_ehdr ehdr; + struct mem_phdr *phdr, *end_phdr; + int align; + off_t size; + uint32_t elf_flags = 0; + uint64_t stext_sym; + const unsigned long long pud_mask = ~((1 << 30) - 1); + unsigned long long vaddr, lowest_vaddr = 0; + + if (elf_info->machine != EM_X86_64) + return 0; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + align = getpagesize(); + buf = slurp_file_len(kcore, KCORE_ELF_HEADERS_SIZE, &size); + if (!buf) { + fprintf(stderr, "Cannot read %s: %s\n", kcore, strerror(errno)); + return -1; + } + + /* Don't perform checks to make sure stated phdrs and shdrs are + * actually present in the core file. It is not practical + * to read the GB size file into a user space buffer, Given the + * fact that we don't use any info from that. + */ + elf_flags |= ELF_SKIP_FILESZ_CHECK; + result = build_elf_core_info(buf, size, &ehdr, elf_flags); + if (result < 0) { + /* Perhaps KCORE_ELF_HEADERS_SIZE is too small? */ + fprintf(stderr, "ELF core (kcore) parse failed\n"); + return -1; + } + + end_phdr = &ehdr.e_phdr[ehdr.e_phnum]; + + /* Search for the real PAGE_OFFSET when KASLR memory randomization + * is enabled */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + vaddr = phdr->p_vaddr & pud_mask; + if (lowest_vaddr == 0 || lowest_vaddr > vaddr) + lowest_vaddr = vaddr; + } + } + if (lowest_vaddr != 0) + elf_info->page_offset = lowest_vaddr; + + /* Traverse through the Elf headers and find the region where + * _stext symbol is located in. That's where kernel is mapped */ + stext_sym = get_kernel_sym("_stext"); + for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if (saddr <= stext_sym && eaddr > stext_sym) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + /* If failed to retrieve kernel text mapping through + * /proc/kallsyms, Traverse through the Elf headers again and + * find the region where kernel is mapped using hard-coded + * kernel mapping boundries */ + for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if ((saddr >= X86_64__START_KERNEL_map) && + (eaddr <= X86_64__START_KERNEL_map + X86_64_KERNEL_TEXT_SIZE)) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + fprintf(stderr, "Can't find kernel text map area from kcore\n"); + return -1; +} + +/* Forward Declaration. */ +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit); +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end); + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region */ +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +/* Memory region reserved for storing panic kernel and other data. */ +#define CRASH_RESERVED_MEM_NR 8 +static struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; +static int crash_reserved_mem_nr; + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges, + int kexec_flags, unsigned long lowmem_limit) +{ + const char *iomem = proc_iomem(); + int memory_ranges = 0, gart = 0, i; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + uint64_t gart_start = 0, gart_end = 0; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + + if (memory_ranges >= CRASH_MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + dbgprintf("%016llx-%016llx : %s", + start, end, str); + /* + * We want to dump any System RAM -- memory regions currently + * used by the kernel. In the usual case, this is "System RAM" + * on the top level. However, we can also have "System RAM + * (virtio_mem)" below virtio devices or "System RAM (kmem)" + * below "Persistent Memory". + */ + if (strstr(str, "System RAM")) { + type = RANGE_RAM; + } else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + /* + * ACPI Tables area need to be passed to new + * kernel with appropriate memmap= option. This + * is needed so that x86_64 kernel creates linear + * mapping for this region which is required for + * initializing acpi tables in second kernel. + */ + type = RANGE_ACPI; + } else if(memcmp(str,"ACPI Non-volatile Storage\n",26) == 0 ) { + type = RANGE_ACPI_NVS; + } else if(memcmp(str,"Persistent Memory (legacy)\n",27) == 0 ) { + type = RANGE_PRAM; + } else if(memcmp(str,"Persistent Memory\n",18) == 0 ) { + type = RANGE_PMEM; + } else if(memcmp(str,"reserved\n",9) == 0 ) { + type = RANGE_RESERVED; + } else if (memcmp(str, "Reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else if (memcmp(str, "GART\n", 5) == 0) { + gart_start = start; + gart_end = end; + gart = 1; + continue; + } else { + continue; + } + + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + + segregate_lowmem_region(&memory_ranges, lowmem_limit); + + memory_ranges++; + } + fclose(fp); + if (kexec_flags & KEXEC_PRESERVE_CONTEXT) { + for (i = 0; i < memory_ranges; i++) { + if (crash_memory_range[i].end > 0x0009ffff) { + crash_reserved_mem[0].start = \ + crash_memory_range[i].start; + break; + } + } + if (crash_reserved_mem[0].start >= mem_max) { + fprintf(stderr, "Too small mem_max: 0x%llx.\n", + mem_max); + return -1; + } + crash_reserved_mem[0].end = mem_max; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(&memory_ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + return -1; + + if (gart) { + /* exclude GART region if the system has one */ + if (exclude_region(&memory_ranges, gart_start, gart_end) < 0) + return -1; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + struct e820entry *e820entries; + int j, rc, ret = -1; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + e820entries = xmalloc(sizeof(*e820entries) * CRASH_MAX_MEMORY_RANGES); + + rc = xc_get_machine_memory_map(xc, e820entries, CRASH_MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(-rc)); + goto err; + } + + for (i = 0, j = 0; i < rc && j < CRASH_MAX_MEMORY_RANGES; ++i, ++j) { + crash_memory_range[j].start = e820entries[i].addr; + crash_memory_range[j].end = e820entries[i].addr + e820entries[i].size - 1; + crash_memory_range[j].type = xen_e820_to_kexec_type(e820entries[i].type); + segregate_lowmem_region(&j, lowmem_limit); + } + + *range = crash_memory_range; + *ranges = j; + + qsort(*range, *ranges, sizeof(struct memory_range), compare_ranges); + + for (i = 0; i < crash_reserved_mem_nr; i++) + if (exclude_region(ranges, crash_reserved_mem[i].start, + crash_reserved_mem[i].end) < 0) + goto err; + + ret = 0; + +err: + xc_interface_close(xc); + free(e820entries); + return ret; +} +#else +static int get_crash_memory_ranges_xen(struct memory_range **range, + int *ranges, unsigned long lowmem_limit) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void segregate_lowmem_region(int *nr_ranges, unsigned long lowmem_limit) +{ + unsigned long long end, start; + unsigned type; + + start = crash_memory_range[*nr_ranges].start; + end = crash_memory_range[*nr_ranges].end; + type = crash_memory_range[*nr_ranges].type; + + if (!(lowmem_limit && lowmem_limit > start && lowmem_limit < end)) + return; + + crash_memory_range[*nr_ranges].end = lowmem_limit - 1; + + if (*nr_ranges >= CRASH_MAX_MEMORY_RANGES - 1) + return; + + ++*nr_ranges; + + crash_memory_range[*nr_ranges].start = lowmem_limit; + crash_memory_range[*nr_ranges].end = end; + crash_memory_range[*nr_ranges].type = type; +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end) +{ + int i, j, tidx = -1; + struct memory_range temp_region = {0, 0, 0}; + + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (start < mend && end > mstart) { + if (start != mstart && end != mend) { + /* Split memory region */ + crash_memory_range[i].end = start - 1; + temp_region.start = end + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (start != mstart) + crash_memory_range[i].end = start - 1; + else + crash_memory_range[i].start = end + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == CRASH_MAX_MEMORY_RANGES) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx] = temp_region; + (*nr_ranges)++; + } + return 0; +} + +/* Adds a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int add_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size, int type) +{ + int i, j, nr_entries = 0, tidx = 0, align = 1024; + unsigned long long mstart, mend; + + /* Shrink to 1KiB alignment if needed. */ + if (type == RANGE_RAM && ((addr%align) || (size%align))) { + unsigned long long end = addr + size; + + printf("%s: RAM chunk %#llx - %#llx unaligned\n", __func__, addr, end); + addr = _ALIGN_UP(addr, align); + end = _ALIGN_DOWN(end, align); + if (addr >= end) + return -1; + size = end - addr; + printf("%s: RAM chunk shrunk to %#llx - %#llx\n", __func__, addr, end); + } + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + break; + if (mstart <= (addr+size-1) && mend >=addr) + /* Overlapping region. */ + return -1; + else if (addr > mend) + tidx = i+1; + } + /* Insert the memory region. */ + for (j = nr_entries-1; j >= tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx].start = addr; + memmap_p[tidx].end = addr + size - 1; + memmap_p[tidx].type = type; + *nr_memmap = nr_entries + 1; + + dbgprint_mem_range("Memmap after adding segment", memmap_p, *nr_memmap); + + return 0; +} + +/* Removes a segment from list of memory regions which new kernel can use to + * boot. Segment start and end should be aligned to 1K boundary. */ +static int delete_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size) +{ + int i, j, nr_entries = 0, tidx = -1, operation = 0, align = 1024; + unsigned long long mstart, mend; + struct memory_range temp_region; + + /* Do alignment check. */ + if ((addr%align) || (size%align)) + return -1; + + /* Make sure at least one entry in list is free. */ + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (!mstart && !mend) + break; + else + nr_entries++; + } + if (nr_entries == CRASH_MAX_MEMMAP_NR) + /* List if full */ + return -1; + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + mstart = memmap_p[i].start; + mend = memmap_p[i].end; + if (mstart == 0 && mend == 0) + /* Did not find the segment in the list. */ + return -1; + if (mstart <= addr && mend >= (addr + size - 1)) { + if (mstart == addr && mend == (addr + size - 1)) { + /* Exact match. Delete region */ + operation = -1; + tidx = i; + break; + } + if (mstart != addr && mend != (addr + size - 1)) { + /* Split in two */ + memmap_p[i].end = addr - 1; + temp_region.start = addr + size; + temp_region.end = mend; + temp_region.type = memmap_p[i].type; + operation = 1; + tidx = i; + break; + } + + /* No addition/deletion required. Adjust the existing.*/ + if (mstart != addr) { + memmap_p[i].end = addr - 1; + break; + } else { + memmap_p[i].start = addr + size; + break; + } + } + } + if ((operation == 1) && tidx >=0) { + /* Insert the split memory region. */ + for (j = nr_entries-1; j > tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx+1] = temp_region; + *nr_memmap = nr_entries + 1; + } + if ((operation == -1) && tidx >=0) { + /* Delete the exact match memory region. */ + for (j = i+1; j < CRASH_MAX_MEMMAP_NR; j++) + memmap_p[j-1] = memmap_p[j]; + memmap_p[j-1].start = memmap_p[j-1].end = 0; + *nr_memmap = nr_entries - 1; + } + + dbgprint_mem_range("Memmap after deleting segment", memmap_p, *nr_memmap); + + return 0; +} + +static void cmdline_add_memmap_internal(char *cmdline, unsigned long startk, + unsigned long endk, int type) +{ + int cmdlen, len; + char str_mmap[256], str_tmp[20]; + + strcpy (str_mmap, " memmap="); + ultoa((endk-startk), str_tmp); + strcat (str_mmap, str_tmp); + + if (type == RANGE_RAM) + strcat (str_mmap, "K@"); + else if (type == RANGE_RESERVED) + strcat (str_mmap, "K$"); + else if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + strcat (str_mmap, "K#"); + else if (type == RANGE_PRAM) + strcat (str_mmap, "K!"); + + ultoa(startk, str_tmp); + strcat (str_mmap, str_tmp); + strcat (str_mmap, "K"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); +} + +/* Adds the appropriate memmap= options to command line, indicating the + * memory regions the new kernel can use to boot into. */ +static int cmdline_add_memmap(char *cmdline, struct memory_range *memmap_p) +{ + int i, cmdlen, len; + unsigned long min_sizek = 100; + char str_mmap[256]; + + /* Exact map */ + strcpy(str_mmap, " memmap=exactmap"); + len = strlen(str_mmap); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str_mmap); + + for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { + unsigned long startk, endk, type; + + startk = memmap_p[i].start/1024; + endk = (memmap_p[i].end + 1)/1024; + type = memmap_p[i].type; + + /* Only adding memory regions of RAM and ACPI and Persistent Mem */ + if (type != RANGE_RAM && + type != RANGE_ACPI && + type != RANGE_ACPI_NVS && + type != RANGE_PRAM) + continue; + + if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + endk = _ALIGN_UP(memmap_p[i].end + 1, 1024)/1024; + + if (!startk && !endk) + /* All regions traversed. */ + break; + + /* A RAM region is not worth adding if region size < 100K. + * It eats up precious command line length. */ + if (type == RANGE_RAM && (endk - startk) < min_sizek) + continue; + /* And do not add e820 reserved region either */ + cmdline_add_memmap_internal(cmdline, startk, endk, type); + } + + dbgprintf("Command line after adding memmap\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + +/* Adds the elfcorehdr= command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) +{ + int cmdlen, len, align = 1024; + char str[30], *ptr; + + /* Passing in elfcorehdr=xxxK format. Saves space required in cmdline. + * Ensure 1K alignment*/ + if (addr%align) + return -1; + addr = addr/align; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr\n"); + dbgprintf("%s\n", cmdline); + + return 0; +} + + +/* + * This routine is specific to i386 architecture to maintain the + * backward compatibility, other architectures can use the per + * cpu version get_crash_notes_per_cpu() directly. + */ +static int get_crash_notes(int cpu, uint64_t *addr, uint64_t *len) +{ + const char *crash_notes = "/sys/kernel/crash_notes"; + char line[MAX_LINE]; + FILE *fp; + unsigned long vaddr; + int count; + + fp = fopen(crash_notes, "r"); + if (fp) { + if (fgets(line, sizeof(line), fp) != 0) { + count = sscanf(line, "%lx", &vaddr); + if (count != 1) + die("Cannot parse %s: %s\n", crash_notes, + strerror(errno)); + } + + *addr = x86__pa(vaddr + (cpu * MAX_NOTE_BYTES)); + *len = MAX_NOTE_BYTES; + + dbgprintf("crash_notes addr = %llx\n", + (unsigned long long)*addr); + + fclose(fp); + return 0; + } else + return get_crash_notes_per_cpu(cpu, addr, len); +} + +static enum coretype get_core_type(struct crash_elf_info *elf_info, + struct memory_range *range, int ranges) +{ + if ((elf_info->machine) == EM_X86_64) + return CORE_TYPE_ELF64; + else { + /* fall back to default */ + if (ranges == 0) + return CORE_TYPE_ELF64; + + if (range[ranges - 1].end > 0xFFFFFFFFUL) + return CORE_TYPE_ELF64; + else + return CORE_TYPE_ELF32; + } +} + +static int sysfs_efi_runtime_map_exist(void) +{ + DIR *dir; + + dir = opendir("/sys/firmware/efi/runtime-map"); + if (!dir) + return 0; + + closedir(dir); + return 1; +} + +/* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ +static void cmdline_add_efi(char *cmdline) +{ + uint64_t acpi_rsdp; + char acpi_rsdp_buf[MAX_LINE]; + + acpi_rsdp = get_acpi_rsdp(); + + if (!acpi_rsdp) + return; + + sprintf(acpi_rsdp_buf, " acpi_rsdp=0x%lx", acpi_rsdp); + if (strlen(cmdline) + strlen(acpi_rsdp_buf) > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + + strcat(cmdline, acpi_rsdp_buf); +} + +static void get_backup_area(struct kexec_info *info, + struct memory_range *range, int ranges) +{ + int i; + + /* Look for first 640 KiB RAM region. */ + for (i = 0; i < ranges; ++i) { + if (range[i].type != RANGE_RAM || range[i].end > 0xa0000) + continue; + + info->backup_src_start = range[i].start; + info->backup_src_size = range[i].end - range[i].start + 1; + + dbgprintf("%s: %016llx-%016llx : System RAM\n", __func__, + range[i].start, range[i].end); + + return; + } + + /* First 640 KiB RAM region not found. Assume defaults. */ + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_END - BACKUP_SRC_START + 1; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, bufsz, memsz, elfcorehdr; + int nr_ranges = 0, nr_memmap = 0, align = 1024, i; + struct memory_range *mem_range, *memmap_p; + struct crash_elf_info elf_info; + unsigned kexec_arch; + + memset(&elf_info, 0x0, sizeof(elf_info)); + + /* Constant parts of the elf_info */ + memset(&elf_info, 0, sizeof(elf_info)); + elf_info.data = ELFDATA2LSB; + + /* Get the architecture of the running kernel */ + kexec_arch = info->kexec_flags & KEXEC_ARCH_MASK; + if (kexec_arch == KEXEC_ARCH_DEFAULT) + kexec_arch = KEXEC_ARCH_NATIVE; + + /* Get the elf architecture of the running kernel */ + switch(kexec_arch) { + case KEXEC_ARCH_X86_64: + elf_info.machine = EM_X86_64; + break; + case KEXEC_ARCH_386: + elf_info.machine = EM_386; + elf_info.lowmem_limit = X86_MAXMEM; + elf_info.get_note_info = get_crash_notes; + break; + default: + fprintf(stderr, "unsupported crashdump architecture: %04x\n", + kexec_arch); + return -1; + } + + if (xen_present()) { + if (get_crash_memory_ranges_xen(&mem_range, &nr_ranges, + elf_info.lowmem_limit) < 0) + return -1; + } else + if (get_crash_memory_ranges(&mem_range, &nr_ranges, + info->kexec_flags, + elf_info.lowmem_limit) < 0) + return -1; + + get_backup_area(info, mem_range, nr_ranges); + + dbgprint_mem_range("CRASH MEMORY RANGES", mem_range, nr_ranges); + + /* + * if the core type has not been set on command line, set it here + * automatically + */ + if (arch_options.core_header_type == CORE_TYPE_UNDEF) { + arch_options.core_header_type = + get_core_type(&elf_info, mem_range, nr_ranges); + } + /* Get the elf class... */ + elf_info.class = ELFCLASS32; + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + elf_info.class = ELFCLASS64; + } + + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + if (get_kernel_paddr(info, &elf_info)) + return -1; + + if (get_kernel_vaddr_and_size(info, &elf_info)) + return -1; + + /* Memory regions which panic kernel can safely use to boot into */ + sz = (sizeof(struct memory_range) * CRASH_MAX_MEMMAP_NR); + memmap_p = xmalloc(sz); + memset(memmap_p, 0, sz); + add_memmap(memmap_p, &nr_memmap, info->backup_src_start, info->backup_src_size, RANGE_RAM); + for (i = 0; i < crash_reserved_mem_nr; i++) { + sz = crash_reserved_mem[i].end - crash_reserved_mem[i].start +1; + if (add_memmap(memmap_p, &nr_memmap, crash_reserved_mem[i].start, + sz, RANGE_RAM) < 0) { + free(memmap_p); + return ENOCRASHKERNEL; + } + } + + /* Create a backup region segment to store backup data*/ + if (!(info->kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + sz = _ALIGN(info->backup_src_size, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, -1); + dbgprintf("Created backup segment at 0x%lx\n", + info->backup_start); + if (delete_memmap(memmap_p, &nr_memmap, info->backup_start, sz) < 0) { + free(tmp); + free(memmap_p); + return EFAILED; + } + } + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info, mem_range, + nr_ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(memmap_p); + return EFAILED; + } + } + /* the size of the elf headers allocated is returned in 'bufsz' */ + + /* Hack: With some ld versions (GNU ld version 2.14.90.0.4 20030523), + * vmlinux program headers show a gap of two pages between bss segment + * and data segment but effectively kernel considers it as bss segment + * and overwrites the any data placed there. Hence bloat the memsz of + * elf core header segment to 16K to avoid being placed in such gaps. + * This is a makeshift solution until it is fixed in kernel. + */ + if (bufsz < (16*1024)) { + /* bufsize is big enough for all the PT_NOTE's and PT_LOAD's */ + memsz = 16*1024; + /* memsz will be the size of the memory hole we look for */ + } else { + memsz = bufsz; + } + elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, + max_addr, -1); + dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); + if (delete_memmap(memmap_p, &nr_memmap, elfcorehdr, memsz) < 0) { + free(memmap_p); + return -1; + } + if (!bzImage_support_efi_boot || arch_options.noefi || + !sysfs_efi_runtime_map_exist()) + cmdline_add_efi(mod_cmdline); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + /* Inform second kernel about the presence of ACPI tables. */ + for (i = 0; i < nr_ranges; i++) { + unsigned long start, end, size, type; + if ( !( mem_range[i].type == RANGE_ACPI + || mem_range[i].type == RANGE_ACPI_NVS + || mem_range[i].type == RANGE_RESERVED + || mem_range[i].type == RANGE_PMEM + || mem_range[i].type == RANGE_PRAM)) + continue; + start = mem_range[i].start; + end = mem_range[i].end; + type = mem_range[i].type; + size = end - start + 1; + add_memmap(memmap_p, &nr_memmap, start, size, type); + } + + if (arch_options.pass_memmap_cmdline) + cmdline_add_memmap(mod_cmdline, memmap_p); + + /* Store 2nd kernel boot memory ranges for later reference in + * x86-setup-linux.c: setup_linux_system_parameters() */ + info->crash_range = memmap_p; + info->nr_crash_ranges = nr_memmap; + + return 0; +} + +/* On x86, the kernel may make a low reservation in addition to the + * normal reservation. However, the kernel refuses to load the panic + * kernel to low memory, so always choose the highest range. + */ +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!crash_reserved_mem_nr) + return -1; + + *start = crash_reserved_mem[crash_reserved_mem_nr - 1].start; + *end = crash_reserved_mem[crash_reserved_mem_nr - 1].end; + + return 0; +} + +static int crashkernel_mem_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + if (nr >= CRASH_RESERVED_MEM_NR) + return 1; + + crash_reserved_mem[nr].start = base; + crash_reserved_mem[nr].end = base + length - 1; + crash_reserved_mem[nr].type = RANGE_RAM; + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int ret; + + if (xen_present()) { + uint64_t start, end; + + ret = xen_get_crashkernel_region(&start, &end); + if (ret < 0) + return 0; + + crash_reserved_mem[0].start = start; + crash_reserved_mem[0].end = end; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } else { + ret = kexec_iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + } + + return !!crash_reserved_mem_nr; +} diff --git a/kexec/arch/i386/crashdump-x86.h b/kexec/arch/i386/crashdump-x86.h new file mode 100644 index 0000000..479a549 --- /dev/null +++ b/kexec/arch/i386/crashdump-x86.h @@ -0,0 +1,33 @@ +#ifndef CRASHDUMP_X86_H +#define CRASHDUMP_X86_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); + +#define X86_PAGE_OFFSET 0xc0000000 +#define x86__pa(x) ((unsigned long)(x)-X86_PAGE_OFFSET) + +#define X86__VMALLOC_RESERVE (128 << 20) +#define X86_MAXMEM (-X86_PAGE_OFFSET-X86__VMALLOC_RESERVE) + +#define X86_64__START_KERNEL_map 0xffffffff80000000ULL +#define X86_64_PAGE_OFFSET_PRE_2_6_27 0xffff810000000000ULL +#define X86_64_PAGE_OFFSET_PRE_4_20_0 0xffff880000000000ULL +#define X86_64_PAGE_OFFSET 0xffff888000000000ULL + +#define X86_64_MAXMEM 0x3fffffffffffUL + +/* Kernel text size */ +#define X86_64_KERNEL_TEXT_SIZE (512UL*1024*1024) + +#define CRASH_MAX_MEMMAP_NR 1024 + +#define CRASH_MAX_MEMORY_RANGES 32768 + +/* Backup Region, First 640K of System RAM. */ +#define BACKUP_SRC_START 0x00000000 +#define BACKUP_SRC_END 0x0009ffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* CRASHDUMP_X86_H */ diff --git a/kexec/arch/i386/include/arch/options.h b/kexec/arch/i386/include/arch/options.h new file mode 100644 index 0000000..89e0a95 --- /dev/null +++ b/kexec/arch/i386/include/arch/options.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_ARCH_I386_OPTIONS_H +#define KEXEC_ARCH_I386_OPTIONS_H + +/* + ************************************************************************* + * NOTE NOTE NOTE + * This file is included for i386 builds *and* x86_64 builds (which build + * both x86_64 and i386 loaders). + * It contains the combined set of options used by i386 and x86_64. + ************************************************************************* + */ + +#define OPT_RESET_VGA (OPT_MAX+0) +#define OPT_SERIAL (OPT_MAX+1) +#define OPT_SERIAL_BAUD (OPT_MAX+2) +#define OPT_CONSOLE_VGA (OPT_MAX+3) +#define OPT_CONSOLE_SERIAL (OPT_MAX+4) +#define OPT_ELF32_CORE (OPT_MAX+5) +#define OPT_ELF64_CORE (OPT_MAX+6) +#define OPT_ARCH_MAX (OPT_MAX+7) + +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_ARGS_ELF (OPT_ARCH_MAX+3) +#define OPT_ARGS_LINUX (OPT_ARCH_MAX+4) +#define OPT_ARGS_NONE (OPT_ARCH_MAX+5) +#define OPT_CL (OPT_ARCH_MAX+6) +#define OPT_MOD (OPT_ARCH_MAX+7) +#define OPT_VGA (OPT_ARCH_MAX+8) +#define OPT_REAL_MODE (OPT_ARCH_MAX+9) +#define OPT_ENTRY_32BIT (OPT_ARCH_MAX+10) +#define OPT_PASS_MEMMAP_CMDLINE (OPT_ARCH_MAX+11) +#define OPT_NOEFI (OPT_ARCH_MAX+12) +#define OPT_REUSE_VIDEO_TYPE (OPT_ARCH_MAX+13) +#define OPT_DTB (OPT_ARCH_MAX+14) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "reset-vga", 0, 0, OPT_RESET_VGA }, \ + { "serial", 1, 0, OPT_SERIAL }, \ + { "serial-baud", 1, 0, OPT_SERIAL_BAUD }, \ + { "console-vga", 0, 0, OPT_CONSOLE_VGA }, \ + { "console-serial", 0, 0, OPT_CONSOLE_SERIAL }, \ + { "elf32-core-headers", 0, 0, OPT_ELF32_CORE }, \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "pass-memmap-cmdline", 0, 0, OPT_PASS_MEMMAP_CMDLINE }, \ + { "noefi", 0, 0, OPT_NOEFI}, \ + { "reuse-video-type", 0, 0, OPT_REUSE_VIDEO_TYPE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "args-elf", 0, NULL, OPT_ARGS_ELF }, \ + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, \ + { "args-none", 0, NULL, OPT_ARGS_NONE }, \ + { "module", 1, 0, OPT_MOD }, \ + { "real-mode", 0, NULL, OPT_REAL_MODE }, \ + { "entry-32bit", 0, NULL, OPT_ENTRY_32BIT }, \ + { "dtb", 1, NULL, OPT_DTB }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_I386_OPTIONS_H */ + diff --git a/kexec/arch/i386/kexec-beoboot-x86.c b/kexec/arch/i386/kexec-beoboot-x86.c new file mode 100644 index 0000000..d949ab8 --- /dev/null +++ b/kexec/arch/i386/kexec-beoboot-x86.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------ -*- C -*- + * Eric Biederman <ebiederman@xmission.com> + * Erik Arjan Hendriks <hendriks@lanl.gov> + * + * 14 December 2004 + * This file is a derivative of the beoboot image loader, modified + * to work with kexec. + * + * This version is derivative from the orignal mkbootimg.c which is + * Copyright (C) 2000 Scyld Computing Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + + * + *--------------------------------------------------------------------*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <x86/x86-linux.h> +#include <boot/beoboot.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include <arch/options.h> + +int beoboot_probe(const char *buf, off_t len) +{ + struct beoboot_header bb_header; + const char *cmdline, *kernel; + int result; + if ((uintmax_t)len < (uintmax_t)sizeof(bb_header)) { + return -1; + } + memcpy(&bb_header, buf, sizeof(bb_header)); + if (memcmp(bb_header.magic, BEOBOOT_MAGIC, 4) != 0) { + return -1; + } + if (bb_header.arch != BEOBOOT_ARCH) { + return -1; + } + /* Make certain a bzImage is packed into there. + */ + cmdline = buf + sizeof(bb_header); + kernel = cmdline + bb_header.cmdline_size; + result = bzImage_probe(kernel, bb_header.kernel_size); + + return result; +} + +void beoboot_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + ); + + /* No parameters are parsed */ +} + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +int beoboot_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct beoboot_header bb_header; + const char *command_line, *kernel, *initrd; + + int real_mode_entry; + int opt; + int result; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "real-mode", 0, 0, OPT_REAL_MODE }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + real_mode_entry = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + } + } + + + /* + * Parse the file + */ + memcpy(&bb_header, buf, sizeof(bb_header)); + command_line = buf + sizeof(bb_header); + kernel = command_line + bb_header.cmdline_size; + initrd = NULL; + if (bb_header.flags & BEOBOOT_INITRD_PRESENT) { + initrd = kernel + bb_header.kernel_size; + } + + result = do_bzImage_load(info, + kernel, bb_header.kernel_size, + command_line, bb_header.cmdline_size, + initrd, bb_header.initrd_size, + 0, 0, real_mode_entry); + + return result; +} + diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c new file mode 100644 index 0000000..1b8f20c --- /dev/null +++ b/kexec/arch/i386/kexec-bzImage.c @@ -0,0 +1,471 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2010 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; +int bzImage_support_efi_boot = 0; + +int bzImage_probe(const char *buf, off_t len) +{ + const struct x86_linux_header *header; + if ((uintmax_t)len < (uintmax_t)(2 * 512)) { + if (probe_debug) { + fprintf(stderr, "File is too short to be a bzImage!\n"); + } + return -1; + } + header = (const struct x86_linux_header *)buf; + if (memcmp(header->header_magic, "HdrS", 4) != 0) { + if (probe_debug) { + fprintf(stderr, "Not a bzImage\n"); + } + return -1; + } + if (header->boot_sector_magic != 0xAA55) { + if (probe_debug) { + fprintf(stderr, "No x86 boot sector present\n"); + } + /* No x86 boot sector present */ + return -1; + } + if (header->protocol_version < 0x0200) { + if (probe_debug) { + fprintf(stderr, "Must be at least protocol version 2.00\n"); + } + /* Must be at least protocol version 2.00 */ + return -1; + } + if ((header->loadflags & 1) == 0) { + if (probe_debug) { + fprintf(stderr, "zImage not a bzImage\n"); + } + /* Not a bzImage */ + return -1; + } + /* I've got a bzImage */ + if (probe_debug) { + fprintf(stderr, "It's a bzImage\n"); + } + return 0; +} + + +void bzImage_usage(void) +{ + printf( " --real-mode Use the kernels real mode entry point.\n" + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --dtb=FILE Use FILE as devicetree.\n" + ); + +} + +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry) +{ + struct x86_linux_header setup_header; + struct x86_linux_param_header *real_mode; + int setup_sects; + size_t size; + int kern16_size; + unsigned long setup_base, setup_size; + struct entry32_regs regs32; + struct entry16_regs regs16; + unsigned int relocatable_kernel = 0; + unsigned long kernel32_load_addr; + char *modified_cmdline; + unsigned long cmdline_end; + unsigned long kern16_size_needed; + unsigned long heap_size = 0; + + /* + * Find out about the file I am about to load. + */ + if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512)) { + return -1; + } + memcpy(&setup_header, kernel, sizeof(setup_header)); + setup_sects = setup_header.setup_sects; + if (setup_sects == 0) { + setup_sects = 4; + } + + kern16_size = (setup_sects +1) *512; + if (kernel_len < kern16_size) { + fprintf(stderr, "BzImage truncated?\n"); + return -1; + } + + if (setup_header.protocol_version >= 0x0206) { + if ((uintmax_t)command_line_len > + (uintmax_t)setup_header.cmdline_size) { + dbgprintf("Kernel command line too long for kernel!\n"); + return -1; + } + } else { + if (command_line_len > 255) { + dbgprintf("WARNING: This kernel may only support 255 byte command lines\n"); + } + } + + if (setup_header.protocol_version >= 0x0205) { + relocatable_kernel = setup_header.relocatable_kernel; + dbgprintf("bzImage is relocatable\n"); + } + + /* Can't use bzImage for crash dump purposes with real mode entry */ + if((info->kexec_flags & KEXEC_ON_CRASH) && real_mode_entry) { + fprintf(stderr, "Can't use bzImage for crash dump purposes" + " with real mode entry\n"); + return -1; + } + + if((info->kexec_flags & KEXEC_ON_CRASH) && !relocatable_kernel) { + fprintf(stderr, "BzImage is not relocatable. Can't be used" + " as capture kernel.\n"); + return -1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. load_crashdump_segments will take care of + * loading the segments as high in memory as possible, hence + * in turn as away as possible from kernel to avoid being + * stomped by the kernel. + */ + if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0) + return -1; + + /* Use new command line buffer */ + command_line = modified_cmdline; + command_line_len = strlen(command_line) +1; + } + + /* Load the trampoline. This must load at a higher address + * than the argument/parameter segment or the kernel will stomp + * it's gdt. + * + * x86_64 purgatory code has got relocations type R_X86_64_32S + * that means purgatory got to be loaded within first 2G otherwise + * overflow takes place while applying relocations. + */ + if (!real_mode_entry && relocatable_kernel) + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 0x7fffffff, -1, 0); + else + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, 640*1024, -1, 0); + dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr); + + /* The argument/parameter segment */ + if (real_mode_entry) { + /* need to include size for bss and heap etc */ + if (setup_header.protocol_version >= 0x0201) + kern16_size_needed = setup_header.heap_end_ptr; + else + kern16_size_needed = kern16_size + 8192; /* bss */ + if (kern16_size_needed < kern16_size) + kern16_size_needed = kern16_size; + if (kern16_size_needed > 0xfffc) + die("kern16_size_needed is more then 64k\n"); + heap_size = 0xfffc - kern16_size_needed; /* less 64k */ + heap_size = _ALIGN_DOWN(heap_size, 0x200); + kern16_size_needed += heap_size; + } else { + kern16_size_needed = kern16_size; + /* need to bigger than size of struct bootparams */ + if (kern16_size_needed < 4096) + kern16_size_needed = 4096; + } + setup_size = kern16_size_needed + command_line_len + + PURGATORY_CMDLINE_SIZE; + real_mode = xmalloc(setup_size); + memset(real_mode, 0, setup_size); + if (!real_mode_entry) { + unsigned long setup_header_size = kernel[0x201] + 0x202 - 0x1f1; + + /* only copy setup_header */ + if (setup_header_size > 0x7f) + setup_header_size = 0x7f; + memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1, + setup_header_size); + } else { + /* copy setup code and setup_header */ + memcpy(real_mode, kernel, kern16_size); + } + + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + /* If using bzImage for capture kernel, then we will not be + * executing real mode code. setup segment can be loaded + * anywhere as we will be just reading command line. + */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, -1, -1); + } + else if (real_mode->protocol_version >= 0x0200) { + /* Careful setup_base must be greater than 8K */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, 640*1024, 1); + } else { + add_segment(info, real_mode, setup_size, SETUP_BASE, setup_size); + setup_base = SETUP_BASE; + } + dbgprintf("Loaded real-mode code and command line at 0x%lx\n", + setup_base); + + /* Verify purgatory loads higher than the parameters */ + if (info->rhdr.rel_addr < setup_base) { + die("Could not put setup code above the kernel parameters\n"); + } + + /* The main kernel segment */ + size = kernel_len - kern16_size; + + if (real_mode->protocol_version >=0x0205 && relocatable_kernel) { + /* Relocatable bzImage */ + unsigned long kern_align = real_mode->kernel_alignment; + unsigned long kernel32_max_addr = DEFAULT_BZIMAGE_ADDR_MAX; + + if (kernel32_max_addr > real_mode->initrd_addr_max) + kernel32_max_addr = real_mode->initrd_addr_max; + + kernel32_load_addr = add_buffer(info, kernel + kern16_size, + size, size, kern_align, + 0x100000, kernel32_max_addr, + 1); + } + else { + kernel32_load_addr = KERN32_BASE; + add_segment(info, kernel + kern16_size, size, + kernel32_load_addr, size); + } + + dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr); + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, real_mode, setup_base, + kern16_size_needed, command_line, command_line_len, + initrd, initrd_len); + + if (real_mode_entry && real_mode->protocol_version >= 0x0201) { + real_mode->loader_flags |= 0x80; /* CAN_USE_HEAP */ + real_mode->heap_end_ptr += heap_size - 0x200; /*stack*/ + } + + /* Get the initial register values */ + if (real_mode_entry) + elf_rel_get_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + + /* + * Initialize the 32bit start information. + */ + regs32.eax = 0; /* unused */ + regs32.ebx = 0; /* 0 == boot not AP processor start */ + regs32.ecx = 0; /* unused */ + regs32.edx = 0; /* unused */ + regs32.esi = setup_base; /* kernel parameters */ + regs32.edi = 0; /* unused */ + regs32.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* stack, unused */ + regs32.ebp = 0; /* unused */ + regs32.eip = kernel32_load_addr; /* kernel entry point */ + + /* + * Initialize the 16bit start information. + */ + if (real_mode_entry) { + regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base >> 4; + regs16.cs = regs16.ds + 0x20; + regs16.ip = 0; + /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */ + regs16.ss = (elf_rel_get_addr(&info->rhdr, "stack_end") - 64*1024) >> 4; + /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */ + regs16.esp = 0xFFFC; + + printf("Starting the kernel in real mode\n"); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + real_mode->kernel_start = kernel32_load_addr; + } + if (real_mode_entry && kexec_debug) { + unsigned long entry16_debug, pre32, first32; + uint32_t old_first32; + /* Find the location of the symbols */ + entry16_debug = elf_rel_get_addr(&info->rhdr, "entry16_debug"); + pre32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_pre32"); + first32 = elf_rel_get_addr(&info->rhdr, "entry16_debug_first32"); + + /* Hook all of the linux kernel hooks */ + real_mode->rmode_switch_cs = entry16_debug >> 4; + real_mode->rmode_switch_ip = pre32 - entry16_debug; + old_first32 = real_mode->kernel_start; + real_mode->kernel_start = first32; + elf_rel_set_symbol(&info->rhdr, "entry16_debug_old_first32", + &old_first32, sizeof(old_first32)); + + regs32.eip = entry16_debug; + } + if (real_mode_entry) { + elf_rel_set_symbol(&info->rhdr, "entry16_regs", + ®s16, sizeof(regs16)); + elf_rel_set_symbol(&info->rhdr, "entry16_debug_regs", + ®s16, sizeof(regs16)); + } + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + cmdline_end = setup_base + kern16_size_needed + command_line_len - 1; + elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end, + sizeof(unsigned long)); + + /* Fill in the information BIOS calls would normally provide. */ + if (!real_mode_entry) { + setup_linux_system_parameters(info, real_mode); + } + + if (dtb) { + setup_linux_dtb(info, real_mode, dtb, dtb_len); + } + + return 0; +} + +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line = NULL; + char *tmp_cmdline = NULL; + const char *ramdisk, *append = NULL, *dtb; + char *ramdisk_buf; + off_t ramdisk_length; + int command_line_len; + int real_mode_entry; + int opt; + int result; + char *dtb_buf; + off_t dtb_length; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "dtb", 1, 0, OPT_DTB }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + real_mode_entry = 0; + ramdisk = 0; + ramdisk_length = 0; + dtb = 0; + dtb_length = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + real_mode_entry = 1; + break; + case OPT_DTB: + dtb = optarg; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + ramdisk_buf = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + dtb_buf = 0; + if (dtb) { + dtb_buf = slurp_file(dtb, &dtb_length); + } + result = do_bzImage_load(info, + buf, len, + command_line, command_line_len, + ramdisk_buf, ramdisk_length, + dtb_buf, dtb_length, + real_mode_entry); + + free(command_line); + return result; +} diff --git a/kexec/arch/i386/kexec-elf-rel-x86.c b/kexec/arch/i386/kexec-elf-rel-x86.c new file mode 100644 index 0000000..55a214e --- /dev/null +++ b/kexec/arch/i386/kexec-elf-rel-x86.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if ((ehdr->e_machine != EM_386) && (ehdr->e_machine != EM_486)) + { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_386_32: + *((uint32_t *)location) += value; + break; + case R_386_PC32: + *((uint32_t *)location) += value - address; + break; + default: + die("Unknown rel relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/i386/kexec-elf-x86.c b/kexec/arch/i386/kexec-elf-x86.c new file mode 100644 index 0000000..8eba242 --- /dev/null +++ b/kexec/arch/i386/kexec-elf-x86.c @@ -0,0 +1,331 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "x86-linux-setup.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; + +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + if (probe_debug) { + fprintf(stderr, "Not an ELF executable\n"); + } + goto out; + } + + /* Verify the architecuture specific bits */ + switch (arch) { + case CORE_TYPE_ELF32: + if ((ehdr.e_machine != EM_386) && (ehdr.e_machine != EM_486)) { + if (probe_debug) + fprintf(stderr, "Not i386 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_ELF64: + if (ehdr.e_machine != EM_X86_64) { + if (probe_debug) + fprintf(stderr, "Not x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + + case CORE_TYPE_UNDEF: + default: + if ( + (ehdr.e_machine != EM_386) && + (ehdr.e_machine != EM_486) && + (ehdr.e_machine != EM_X86_64) + ) { + if (probe_debug) + fprintf(stderr, "Not i386 or x86_64 ELF executable\n"); + result = -1; + goto out; + } + break; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +int elf_x86_probe(const char *buf, off_t len) { + return elf_x86_any_probe(buf, len, CORE_TYPE_ELF32); +} + +void elf_x86_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --args-linux Pass linux kernel style options\n" + " --args-elf Pass elf boot notes\n" + ); + + +} + +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line = NULL, *modified_cmdline = NULL; + const char *append = NULL; + char *tmp_cmdline = NULL; + const char *error_msg = NULL; + int result; + int command_line_len; + const char *ramdisk; + unsigned long entry, max_addr; + int arg_style; +#define ARG_STYLE_ELF 0 +#define ARG_STYLE_LINUX 1 +#define ARG_STYLE_NONE 2 + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "args-elf", 0, NULL, OPT_ARGS_ELF }, + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, + { "args-none", 0, NULL, OPT_ARGS_NONE }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + arg_style = ARG_STYLE_ELF; + ramdisk = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_ARGS_ELF: + arg_style = ARG_STYLE_ELF; + break; + case OPT_ARGS_LINUX: + arg_style = ARG_STYLE_LINUX; + break; + case OPT_ARGS_NONE: +#ifdef __i386__ + arg_style = ARG_STYLE_NONE; +#else + die("--args-none only works on arch i386\n"); +#endif + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Do we want arguments? */ + if (arg_style != ARG_STYLE_NONE) { + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + } + if (arg_style == ARG_STYLE_NONE) { + info->entry = (void *)entry; + + } + else if (arg_style == ARG_STYLE_ELF) { + unsigned long note_base; + struct entry32_regs regs; + uint32_t arg1, arg2; + + /* Setup the ELF boot notes */ + note_base = elf_boot_notes(info, max_addr, + command_line, command_line_len); + + /* Initialize the stack arguments */ + arg2 = 0; /* No return address */ + arg1 = note_base; + elf_rel_set_symbol(&info->rhdr, "stack_arg32_1", &arg1, sizeof(arg1)); + elf_rel_set_symbol(&info->rhdr, "stack_arg32_2", &arg2, sizeof(arg2)); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_arg32_2"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + if (ramdisk) { + error_msg = "Ramdisks not supported with generic elf arguments"; + goto out; + } + } + else if (arg_style == ARG_STYLE_LINUX) { + struct x86_linux_faked_param_header *hdr; + unsigned long param_base; + const char *ramdisk_buf; + off_t ramdisk_length; + struct entry32_regs regs; + int rc = 0; + + /* Get the linux parameter header */ + hdr = xmalloc(sizeof(*hdr)); + + /* Hack: With some ld versions, vmlinux program headers show + * a gap of two pages between bss segment and data segment + * but effectively kernel considers it as bss segment and + * overwrites the any data placed there. Hence bloat the + * memsz of parameter segment to 16K to avoid being placed + * in such gaps. + * This is a makeshift solution until it is fixed in kernel + */ + param_base = add_buffer(info, hdr, sizeof(*hdr), 16*1024, + 16, 0, max_addr, 1); + + /* Initialize the parameter header */ + memset(hdr, 0, sizeof(*hdr)); + init_linux_parameters(&hdr->hdr); + + /* Add a ramdisk to the current image */ + ramdisk_buf = NULL; + ramdisk_length = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & (KEXEC_ON_CRASH|KEXEC_PRESERVE_CONTEXT)) { + rc = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (rc < 0) { + result = -1; + goto out; + } + /* Use new command line. */ + free(command_line); + command_line = modified_cmdline; + command_line_len = strlen(modified_cmdline) + 1; + modified_cmdline = NULL; + } + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, &hdr->hdr, param_base, + offsetof(struct x86_linux_faked_param_header, command_line), + command_line, command_line_len, + ramdisk_buf, ramdisk_length); + + /* Fill in the information bios calls would usually provide */ + setup_linux_system_parameters(info, &hdr->hdr); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.ebx = 0; /* Bootstrap processor */ + regs.esi = param_base; /* Pointer to the parameters */ + regs.eip = entry; /* The entry point */ + regs.esp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + } + else { + error_msg = "Unknown argument style\n"; + } + +out: + free(command_line); + free(modified_cmdline); + if (error_msg) + die("%s", error_msg); + return result; +} diff --git a/kexec/arch/i386/kexec-mb2-x86.c b/kexec/arch/i386/kexec-mb2-x86.c new file mode 100644 index 0000000..0d2e93b --- /dev/null +++ b/kexec/arch/i386/kexec-mb2-x86.c @@ -0,0 +1,616 @@ +/* + * kexec-mb2-x86.c + * + * multiboot2 support for kexec to boot xen. + * + * Copyright (C) 2019 Varad Gautam (vrd at amazon.de), Amazon.com, Inc. or its affiliates. + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts taken from kexec-multiboot-x86.c, Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include "../../kexec-syscall.h" +#include "../../kexec-xen.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/multiboot2.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +struct multiboot2_header_info { + struct multiboot_header_tag_information_request *request_tag; + struct multiboot_header_tag_address *addr_tag; + struct multiboot_header_tag_entry_address *entry_addr_tag; + struct multiboot_header_tag_console_flags *console_tag; + struct multiboot_header_tag_framebuffer *fb_tag; + struct multiboot_header_tag_module_align *mod_align_tag; + struct multiboot_header_tag_relocatable *rel_tag; +} mhi; + +#define ALIGN_UP(addr, align) \ + ((addr + (typeof (addr)) align - 1) & ~((typeof (addr)) align - 1)) + +int multiboot2_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + + /* First of all, check that this is an ELF file for either x86 or x86-64 */ + i = elf_x86_any_probe(buf, buf_len, CORE_TYPE_UNDEF); + if (i < 0) + return i; + + /* Now look for a multiboot header. */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) + len = buf_len; + + memcpy(headerbuf, buf, len); + if (len < sizeof(struct multiboot_header)) { + /* Short file */ + return -1; + } + for (mbh = (struct multiboot_header *) headerbuf; + ((char *) mbh <= (char *) headerbuf + len - sizeof(struct multiboot_header)); + mbh = (struct multiboot_header *) ((char *) mbh + MULTIBOOT_HEADER_ALIGN)) { + if (mbh->magic == MULTIBOOT2_HEADER_MAGIC + && !((mbh->magic+mbh->architecture+mbh->header_length+mbh->checksum) & 0xffffffff)) { + /* Found multiboot header. */ + return 0; + } + } + /* Not multiboot */ + return -1; +} + +void multiboot2_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + +static size_t +multiboot2_get_mbi_size(int ranges, int cmdline_size, int modcount, int modcmd_size) +{ + size_t mbi_size; + + mbi_size = (2 * sizeof (uint32_t) /* u32 total_size, u32 reserved */ + + ALIGN_UP (sizeof (struct multiboot_tag_basic_meminfo), MULTIBOOT_TAG_ALIGN) + + ALIGN_UP ((sizeof (struct multiboot_tag_mmap) + + ranges * sizeof (struct multiboot_mmap_entry)), MULTIBOOT_TAG_ALIGN) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (cmdline_size, MULTIBOOT_TAG_ALIGN)) + + (sizeof (struct multiboot_tag_string) + + ALIGN_UP (strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1, MULTIBOOT_TAG_ALIGN)) + + (modcount * sizeof (struct multiboot_tag_module) + modcmd_size)) + + sizeof (struct multiboot_tag); /* end tag */ + + if (mhi.rel_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_load_base_addr), MULTIBOOT_TAG_ALIGN); + + if (mhi.fb_tag) + mbi_size += ALIGN_UP (sizeof (struct multiboot_tag_framebuffer), MULTIBOOT_TAG_ALIGN); + + return mbi_size; +} + +static void multiboot2_read_header_tags(void) +{ + struct multiboot_header_tag *tag; + + for (tag = (struct multiboot_header_tag *) (mbh + 1); + tag->type != MULTIBOOT_TAG_TYPE_END; + tag = (struct multiboot_header_tag *) ((char *) tag + ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN))) + { + switch (tag->type) + { + case MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST: + { + mhi.request_tag = (struct multiboot_header_tag_information_request *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_RELOCATABLE: + { + mhi.rel_tag = (struct multiboot_header_tag_relocatable *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ADDRESS: + { + mhi.addr_tag = (struct multiboot_header_tag_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS: + { + mhi.entry_addr_tag = (struct multiboot_header_tag_entry_address *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS: + { + mhi.console_tag = (struct multiboot_header_tag_console_flags *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_FRAMEBUFFER: + { + mhi.fb_tag = (struct multiboot_header_tag_framebuffer *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_MODULE_ALIGN: + { + mhi.mod_align_tag = (struct multiboot_header_tag_module_align *) tag; + break; + } + case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64: + case MULTIBOOT_HEADER_TAG_EFI_BS: + /* Ignoring EFI. */ + break; + default: + { + if (!(tag->flags & MULTIBOOT_HEADER_TAG_OPTIONAL)) + fprintf(stderr, "unsupported tag: 0x%x", tag->type); + break; + } + } + } +} + +struct multiboot_mmap_entry *multiboot_construct_memory_map(struct memory_range *range, + int ranges, + unsigned long long *mem_lower, + unsigned long long *mem_upper) +{ + struct multiboot_mmap_entry *entries; + int i; + + *mem_lower = *mem_upper = 0; + entries = xmalloc(ranges * sizeof(*entries)); + for (i = 0; i < ranges; i++) { + entries[i].addr = range[i].start; + entries[i].len = range[i].end - range[i].start + 1; + + if (range[i].type == RANGE_RAM) { + entries[i].type = MULTIBOOT_MEMORY_AVAILABLE; + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > *mem_lower)) { + range[i].start = 0; + *mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > *mem_upper + 0x100000)) + *mem_upper = range[i].end - 0x100000; + } else if (range[i].type == RANGE_ACPI) + entries[i].type = MULTIBOOT_MEMORY_ACPI_RECLAIMABLE; + else if (range[i].type == RANGE_ACPI_NVS) + entries[i].type = MULTIBOOT_MEMORY_NVS; + else if (range[i].type == RANGE_RESERVED) + entries[i].type = MULTIBOOT_MEMORY_RESERVED; + } + return entries; +} + +static uint64_t multiboot2_make_mbi(struct kexec_info *info, char *cmdline, int cmdline_len, + unsigned long load_base_addr, void *mbi_buf, size_t mbi_bytes) +{ + uint64_t *ptrorig = mbi_buf; + struct multiboot_mmap_entry *mmap_entries; + unsigned long long mem_lower = 0, mem_upper = 0; + + *ptrorig = mbi_bytes; /* u32 total_size, u32 reserved */ + ptrorig++; + + mmap_entries = multiboot_construct_memory_map(info->memory_range, info->memory_ranges, &mem_lower, &mem_upper); + { + struct multiboot_tag_basic_meminfo *tag = (struct multiboot_tag_basic_meminfo *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BASIC_MEMINFO; + tag->size = sizeof (struct multiboot_tag_basic_meminfo); + tag->mem_lower = mem_lower >> 10; + tag->mem_upper = mem_upper >> 10; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_mmap *tag = (struct multiboot_tag_mmap *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_MMAP; + tag->size = sizeof(struct multiboot_tag_mmap) + sizeof(struct multiboot_mmap_entry) * info->memory_ranges; + tag->entry_size = sizeof(struct multiboot_mmap_entry); + tag->entry_version = 0; + memcpy(tag->entries, mmap_entries, tag->entry_size * info->memory_ranges); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.rel_tag) { + struct multiboot_tag_load_base_addr *tag = (struct multiboot_tag_load_base_addr *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR; + tag->size = sizeof (struct multiboot_tag_load_base_addr); + tag->load_base_addr = load_base_addr; + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_CMDLINE; + tag->size = sizeof (struct multiboot_tag_string) + cmdline_len; + memcpy(tag->string, cmdline, cmdline_len); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + { + struct multiboot_tag_string *tag = (struct multiboot_tag_string *) ptrorig; + + tag->type = MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME; + tag->size = sizeof(struct multiboot_tag_string) + strlen(BOOTLOADER " " BOOTLOADER_VERSION) + 1; + sprintf(tag->string, "%s", BOOTLOADER " " BOOTLOADER_VERSION); + ptrorig += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + + if (mhi.fb_tag) { + struct multiboot_tag_framebuffer *tag = (struct multiboot_tag_framebuffer *) ptrorig; + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + tag->common.type = MULTIBOOT_TAG_TYPE_FRAMEBUFFER; + tag->common.size = sizeof(struct multiboot_tag_framebuffer); + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + tag->common.framebuffer_addr = 0xb8000; + tag->common.framebuffer_pitch = 80*2; + tag->common.framebuffer_width = 80; + tag->common.framebuffer_height = 25; + tag->common.framebuffer_bpp = 16; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + goto out; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + goto out; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + goto out; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + goto out; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + goto out; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + goto out; + } + + tag->common.framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + tag->common.framebuffer_addr = info.smem_start; + tag->common.framebuffer_pitch = info.line_length; + tag->common.framebuffer_width = mode.xres; + tag->common.framebuffer_height = mode.yres; + tag->common.framebuffer_bpp = mode.bits_per_pixel; + + tag->framebuffer_red_field_position = mode.red.offset; + tag->framebuffer_red_mask_size = mode.red.length; + tag->framebuffer_green_field_position = mode.green.offset; + tag->framebuffer_green_mask_size = mode.green.length; + tag->framebuffer_blue_field_position = mode.blue.offset; + tag->framebuffer_blue_mask_size = mode.blue.length; + + ptrorig += ALIGN_UP (tag->common.size, MULTIBOOT_TAG_ALIGN) / sizeof (*ptrorig); + } + +out: + return (uint64_t) (uintptr_t) ptrorig; +} + +static uint64_t multiboot2_mbi_add_module(void *mbi_buf, uint64_t mbi_ptr, uint32_t mod_start, + uint32_t mod_end, char *mod_clp) +{ + struct multiboot_tag_module *tag = (struct multiboot_tag_module *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_MODULE; + tag->size = sizeof(struct multiboot_tag_module) + strlen((char *)(long) mod_clp) + 1; + tag->mod_start = mod_start; + tag->mod_end = mod_end; + + memcpy(tag->cmdline, (char *)(long) mod_clp, strlen((char *)(long) mod_clp) + 1); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static uint64_t multiboot2_mbi_end(void *mbi_buf, uint64_t mbi_ptr) +{ + struct multiboot_tag *tag = (struct multiboot_tag *) (uintptr_t) mbi_ptr; + + tag->type = MULTIBOOT_TAG_TYPE_END; + tag->size = sizeof (struct multiboot_tag); + mbi_ptr += ALIGN_UP (tag->size, MULTIBOOT_TAG_ALIGN); + + return mbi_ptr; +} + +static inline int multiboot2_rel_valid(struct multiboot_header_tag_relocatable *rel_tag, + uint64_t rel_start, uint64_t rel_end) +{ + if (rel_start >= rel_tag->min_addr && rel_end <= rel_tag->max_addr) + return 1; + + return 0; +} + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + void *mbi_buf; + size_t mbi_bytes; + unsigned long addr; + struct entry32_regs regs; + char *command_line = NULL, *tmp_cmdline = NULL; + int command_line_len; + char *imagename, *cp, *append = NULL;; + int opt; + int modules, mod_command_line_space; + uint64_t mbi_ptr; + char *mod_clp_base; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + uint64_t rel_min, rel_max; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) + { + fprintf(stderr, "Cannot find a loadable multiboot2 header.\n"); + return -1; + } + + /* Parse the header tags. */ + multiboot2_read_header_tags(); + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) + { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + + if (xen_present() && info->kexec_flags & KEXEC_LIVE_UPDATE ) { + if (!mhi.rel_tag) { + fprintf(stderr, "Multiboot2 image must be relocatable" + "for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + cmdline_add_liveupdate(&command_line); + } + + command_line_len = strlen(command_line) + 1; + + /* Load the ELF executable */ + if (mhi.rel_tag) { + rel_min = mhi.rel_tag->min_addr; + rel_max = mhi.rel_tag->max_addr; + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + /* TODO also check if elf is xen */ + /* On a live update, load target xen over the current xen image. */ + uint64_t xen_start, xen_end; + + xen_get_kexec_range(KEXEC_RANGE_MA_XEN, &xen_start, &xen_end); + if (multiboot2_rel_valid(mhi.rel_tag, xen_start, xen_end)) { + rel_min = xen_start; + } else { + fprintf(stderr, "Cannot place Elf into " + "KEXEC_RANGE_MA_XEN for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + elf_exec_build_load_relocatable(info, &ehdr, buf, len, 0, + rel_min, rel_max, mhi.rel_tag->align); + } else + elf_exec_build_load(info, &ehdr, buf, len, 0); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + uint64_t lu_start, lu_end; + + xen_get_kexec_range(7 /* KEXEC_RANGE_MA_LIVEUPDATE */, &lu_start, &lu_end); + /* Fit everything else into lu_start-lu_end. First page after lu_start is + * reserved for LU breadcrumb. */ + rel_min = lu_start + 4096; + rel_max = lu_end; + } else { + rel_min = 0x500; + rel_max = ULONG_MAX; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + rel_min, rel_max, 1, 0); + + /* Construct information tags. */ + mbi_bytes = multiboot2_get_mbi_size(info->memory_ranges, command_line_len, modules, mod_command_line_space); + mbi_buf = xmalloc(mbi_bytes); + + mbi_ptr = multiboot2_make_mbi(info, command_line, command_line_len, info->rhdr.rel_addr, mbi_buf, mbi_bytes); + free(command_line); + + if (info->kexec_flags & KEXEC_LIVE_UPDATE && xen_present()) { + if (multiboot2_rel_valid(mhi.rel_tag, rel_min, rel_max)) { + /* Shrink the reloc range to fit into LU region for xen. */ + mhi.rel_tag->min_addr = rel_min; + mhi.rel_tag->max_addr = rel_max; + } else { + fprintf(stderr, "Multiboot2 image cannot be relocated into " + "KEXEC_RANGE_MA_LIVEUPDATE for KEXEC_LIVE_UPDATE.\n"); + return -1; + } + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + int i = 0; + + mod_clp_base = xmalloc(mod_command_line_space); + + /* Go back and parse the module command lines */ + mod_clp = mod_clp_base; + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in. Always page align. */ + addr = add_buffer(info, buf, mod_size, mod_size, getpagesize(), + rel_min, rel_max, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + mbi_ptr = multiboot2_mbi_add_module(mbi_buf, mbi_ptr, addr, addr + mod_size, mod_clp); + + mod_clp += strlen(mod_clp) + 1; + i++; + } + + free(mod_clp_base); + } + + mbi_ptr = multiboot2_mbi_end(mbi_buf, mbi_ptr); + + if (sort_segments(info) < 0) + return -1; + + addr = add_buffer(info, mbi_buf, mbi_bytes, mbi_bytes, 4, + rel_min, rel_max, 1); + + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = MULTIBOOT2_BOOTLOADER_MAGIC; + regs.ebx = addr; + regs.eip = ehdr.e_entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + + return 0; +} diff --git a/kexec/arch/i386/kexec-multiboot-x86.c b/kexec/arch/i386/kexec-multiboot-x86.c new file mode 100644 index 0000000..33c885a --- /dev/null +++ b/kexec/arch/i386/kexec-multiboot-x86.c @@ -0,0 +1,505 @@ +/* + * kexec-multiboot-x86.c + * + * (partial) multiboot support for kexec. Only supports ELF32 + * kernels, and a subset of the multiboot info page options + * (i.e. enough to boot the Xen hypervisor). + * + * TODO: + * - smarter allocation of new segments + * - proper support for the MULTIBOOT_VIDEO_MODE bit + * + * + * Copyright (C) 2003 Tim Deegan (tjd21 at cl.cam.ac.uk) + * + * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc + * Parts copied from kexec-elf32-x86.c, written by Eric Biederman + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86.h" +#include <arch/options.h> + +/* From GNU GRUB */ +#include <x86/mb_header.h> +#include <x86/mb_info.h> + +/* Framebuffer */ +#include <sys/ioctl.h> +#include <linux/fb.h> + +extern struct arch_options_t arch_options; + +/* Static storage */ +static char headerbuf[MULTIBOOT_SEARCH]; +static struct multiboot_header *mbh = NULL; +static off_t mbh_offset = 0; + +#define MIN(_x,_y) (((_x)<=(_y))?(_x):(_y)) + + +int multiboot_x86_probe(const char *buf, off_t buf_len) +/* Is it a good idea to try booting this file? */ +{ + int i, len; + /* Now look for a multiboot header in the first 8KB */ + len = MULTIBOOT_SEARCH; + if (len > buf_len) { + len = buf_len; + } + memcpy(headerbuf, buf, len); + if (len < 12) { + /* Short file */ + return -1; + } + for (mbh_offset = 0; mbh_offset <= (len - 12); mbh_offset += 4) + { + /* Search for a multiboot header */ + mbh = (struct multiboot_header *)(headerbuf + mbh_offset); + if (mbh->magic != MULTIBOOT_MAGIC + || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff)) + { + /* Not a multiboot header */ + continue; + } + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + if (mbh->load_addr & 0xfff) { + fprintf(stderr, "multiboot load address not 4k aligned\n"); + return -1; + } + if (mbh->load_addr > mbh->header_addr) { + fprintf(stderr, "multiboot header address > load address\n"); + return -1; + } + if (mbh->load_end_addr < mbh->load_addr) { + fprintf(stderr, "multiboot load end address < load address\n"); + return -1; + } + if (mbh->bss_end_addr < mbh->load_end_addr) { + fprintf(stderr, "multiboot bss end address < load end address\n"); + return -1; + } + if (mbh->load_end_addr - mbh->header_addr > buf_len - mbh_offset) { + fprintf(stderr, "multiboot file truncated\n"); + return -1; + } + if (mbh->entry_addr < mbh->load_addr || mbh->entry_addr >= mbh->load_end_addr) { + fprintf(stderr, "multiboot entry out of range\n"); + return -1; + } + } else { + if ((i=elf_x86_probe(buf, buf_len)) < 0) + return i; + } + if (mbh->flags & MULTIBOOT_UNSUPPORTED) { + /* Requires options we don't support */ + fprintf(stderr, + "Found a multiboot header, but it " + "requires multiboot options that I\n" + "don't understand. Sorry.\n"); + return -1; + } + /* Bootable */ + return 0; + } + /* Not multiboot */ + return -1; +} + + +void multiboot_x86_usage(void) +/* Multiboot-specific options */ +{ + printf(" --command-line=STRING Set the kernel command line to STRING.\n"); + printf(" --reuse-cmdline Use kernel command line from running system.\n"); + printf(" --module=\"MOD arg1 arg2...\" Load module MOD with command-line \"arg1...\"\n"); + printf(" (can be used multiple times).\n"); +} + + +static int framebuffer_info(struct multiboot_info *mbi) +{ + struct fb_fix_screeninfo info; + struct fb_var_screeninfo mode; + int fd; + + /* check if purgatory will reset to standard ega text mode */ + if (arch_options.reset_vga || arch_options.console_vga) { + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_EGA_TEXT; + mbi->framebuffer_addr = 0xb8000; + mbi->framebuffer_pitch = 80*2; + mbi->framebuffer_width = 80; + mbi->framebuffer_height = 25; + mbi->framebuffer_bpp = 16; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; + } + + /* use current graphics framebuffer settings */ + fd = open("/dev/fb0", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open /dev/fb0: %s\n", strerror(errno)); + return -1; + } + if (ioctl(fd, FBIOGET_FSCREENINFO, &info) < 0){ + fprintf(stderr, "can't get screeninfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + if (ioctl(fd, FBIOGET_VSCREENINFO, &mode) < 0){ + fprintf(stderr, "can't get modeinfo: %s\n", strerror(errno)); + close(fd); + return -1; + } + close(fd); + + if (info.smem_start == 0 || info.smem_len == 0) { + fprintf(stderr, "can't get linerar framebuffer address\n"); + return -1; + } + + if (info.type != FB_TYPE_PACKED_PIXELS) { + fprintf(stderr, "unsupported framebuffer type\n"); + return -1; + } + + if (info.visual != FB_VISUAL_TRUECOLOR) { + fprintf(stderr, "unsupported framebuffer visual\n"); + return -1; + } + + mbi->framebuffer_type = MB_FRAMEBUFFER_TYPE_RGB; + mbi->framebuffer_addr = info.smem_start; + mbi->framebuffer_pitch = info.line_length; + mbi->framebuffer_width = mode.xres; + mbi->framebuffer_height = mode.yres; + mbi->framebuffer_bpp = mode.bits_per_pixel; + mbi->framebuffer_red_field_position = mode.red.offset; + mbi->framebuffer_red_mask_size = mode.red.length; + mbi->framebuffer_green_field_position = mode.green.offset; + mbi->framebuffer_green_mask_size = mode.green.length; + mbi->framebuffer_blue_field_position = mode.blue.offset; + mbi->framebuffer_blue_mask_size = mode.blue.length; + + mbi->flags |= MB_INFO_FRAMEBUFFER_INFO; + return 0; +} + +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +/* Marshal up a multiboot-style kernel */ +{ + struct multiboot_info *mbi; + void *mbi_buf; + struct mod_list *modp; + unsigned long freespace; + unsigned long long mem_lower = 0, mem_upper = 0; + struct mem_ehdr ehdr; + unsigned long mbi_base; + struct entry32_regs regs; + size_t mbi_bytes, mbi_offset; + char *command_line = NULL, *tmp_cmdline = NULL; + char *imagename, *cp, *append = NULL;; + struct memory_range *range; + int ranges; + struct AddrRangeDesc *mmap; + int command_line_len; + int i, result; + uint32_t u, entry; + int opt; + int modules, mod_command_line_space; + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_CL }, + { "append", 1, 0, OPT_CL }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "module", 1, 0, OPT_MOD }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* Probe for the MB header if it's not already found */ + if (mbh == NULL && multiboot_x86_probe(buf, len) != 1) { + fprintf(stderr, "Cannot find a loadable multiboot header.\n"); + return -1; + } + + /* Parse the command line */ + command_line_len = 0; + modules = 0; + mod_command_line_space = 0; + result = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_CL: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + command_line = get_command_line(); + break; + case OPT_MOD: + modules++; + mod_command_line_space += strlen(optarg) + 1; + break; + } + } + imagename = argv[optind]; + + /* Final command line = imagename + <OPT_REUSE_CMDLINE> + <OPT_CL> */ + tmp_cmdline = concat_cmdline(command_line, append); + if (command_line) { + free(command_line); + } + command_line = concat_cmdline(imagename, tmp_cmdline); + if (tmp_cmdline) { + free(tmp_cmdline); + } + command_line_len = strlen(command_line) + 1; + + if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + add_segment(info, + buf + (mbh_offset - (mbh->header_addr - mbh->load_addr)), + mbh->load_end_addr - mbh->load_addr, + mbh->load_addr, + mbh->bss_end_addr - mbh->load_addr); + entry = mbh->entry_addr; + } else { + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + entry = ehdr.e_entry; + } + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, 0, + ULONG_MAX, 1, 0); + + /* The first segment will contain the multiboot headers: + * ============= + * multiboot information (mbi) + * ------------- + * kernel command line + * ------------- + * bootloader name + * ------------- + * module information entries + * ------------- + * module command lines + * ============== + */ + mbi_bytes = _ALIGN(sizeof(*mbi) + command_line_len + + strlen (BOOTLOADER " " BOOTLOADER_VERSION) + 1, 4); + mbi_buf = xmalloc(mbi_bytes); + mbi = mbi_buf; + memset(mbi, 0, sizeof(*mbi)); + sprintf(((char *)mbi) + sizeof(*mbi), "%s", command_line); + sprintf(((char *)mbi) + sizeof(*mbi) + command_line_len, "%s", + BOOTLOADER " " BOOTLOADER_VERSION); + mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME; + /* We'll relocate these to absolute addresses later. For now, + * all addresses within the first segment are relative to the + * start of the MBI. */ + mbi->cmdline = sizeof(*mbi); + mbi->boot_loader_name = sizeof(*mbi) + command_line_len; + + /* Memory map */ + range = info->memory_range; + ranges = info->memory_ranges; + mmap = xmalloc(ranges * sizeof(*mmap)); + for (i=0; i<ranges; i++) { + unsigned long long length; + length = range[i].end - range[i].start + 1; + /* Translate bzImage mmap to multiboot-speak */ + mmap[i].size = sizeof(mmap[i]) - 4; + mmap[i].base_addr_low = range[i].start & 0xffffffff; + mmap[i].base_addr_high = range[i].start >> 32; + mmap[i].length_low = length & 0xffffffff; + mmap[i].length_high = length >> 32; + switch (range[i].type) { + case RANGE_RAM: + mmap[i].Type = 1; /* RAM */ + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > mem_lower)) { + range[i].start = 0; + mem_lower = range[i].end; + } + /* Is this the "high" memory? */ + if ((range[i].start <= 0x100000) + && (range[i].end > mem_upper + 0x100000)) + mem_upper = range[i].end - 0x100000; + break; + case RANGE_ACPI: + mmap[i].Type = 3; + break; + case RANGE_ACPI_NVS: + mmap[i].Type = 4; + break; + case RANGE_RESERVED: + default: + mmap[i].Type = 2; /* Not RAM (reserved) */ + } + } + + if (mbh->flags & MULTIBOOT_MEMORY_INFO) { + /* Provide a copy of the memory map to the kernel */ + + mbi->flags |= MB_INFO_MEMORY | MB_INFO_MEM_MAP; + + freespace = add_buffer(info, + mmap, ranges * sizeof(*mmap), ranges * sizeof(*mmap), + 4, 0, 0xFFFFFFFFUL, 1); + + mbi->mmap_addr = freespace; + mbi->mmap_length = ranges * sizeof(*mmap); + + /* For kernels that care naught for fancy memory maps + * and just want the size of low and high memory */ + mbi->mem_lower = MIN(mem_lower>>10, 0xffffffff); + mbi->mem_upper = MIN(mem_upper>>10, 0xffffffff); + + /* done */ + } + + /* Video */ + if (mbh->flags & MULTIBOOT_VIDEO_MODE) { + if (framebuffer_info(mbi) < 0) + fprintf(stderr, "not providing framebuffer information.\n"); + } + + /* Load modules */ + if (modules) { + char *mod_filename, *mod_command_line, *mod_clp, *buf; + off_t mod_size; + + /* We'll relocate this to an absolute address later */ + mbi->mods_addr = mbi_bytes; + mbi->mods_count = 0; + mbi->flags |= MB_INFO_MODS; + + /* Add room for the module descriptors to the MBI buffer */ + mbi_bytes += (sizeof(*modp) * modules) + + mod_command_line_space; + mbi_buf = xrealloc(mbi_buf, mbi_bytes); + + /* mbi might have moved */ + mbi = mbi_buf; + /* module descriptors go in the newly added space */ + modp = ((void *)mbi) + mbi->mods_addr; + /* module command lines go after the descriptors */ + mod_clp = ((void *)modp) + (sizeof(*modp) * modules); + + /* Go back and parse the module command lines */ + optind = opterr = 1; + while((opt = getopt_long(argc, argv, + short_options, options, 0)) != -1) { + if (opt != OPT_MOD) continue; + + /* Split module filename from command line */ + mod_command_line = mod_filename = optarg; + if ((cp = strchr(mod_filename, ' ')) != NULL) { + /* See as I discard the 'const' modifier */ + *cp = '\0'; + } + + /* Load the module */ + buf = slurp_decompress_file(mod_filename, &mod_size); + + if (cp != NULL) *cp = ' '; + + /* Pick the next aligned spot to load it in */ + freespace = add_buffer(info, + buf, mod_size, mod_size, + getpagesize(), 0, 0xffffffffUL, 1); + + /* Add the module command line */ + sprintf(mod_clp, "%s", mod_command_line); + + modp->mod_start = freespace; + modp->mod_end = freespace + mod_size; + modp->cmdline = (void *)mod_clp - (void *)mbi; + modp->pad = 0; + + /* Done */ + mbi->mods_count++; + mod_clp += strlen(mod_clp) + 1; + modp++; + } + } + + /* Find a place for the MBI to live */ + if (sort_segments(info) < 0) { + result = -1; + goto out; + } + mbi_base = add_buffer(info, + mbi_buf, mbi_bytes, mbi_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + /* Relocate offsets in the MBI to absolute addresses */ + mbi_offset = mbi_base; + modp = ((void *)mbi) + mbi->mods_addr; + for (u = 0; u < mbi->mods_count; u++) { + modp[u].cmdline += mbi_offset; + } + mbi->mods_addr += mbi_offset; + mbi->cmdline += mbi_offset; + mbi->boot_loader_name += mbi_offset; + + /* Specify the initial CPU state and copy the setup code */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + regs.eax = 0x2BADB002; + regs.ebx = mbi_offset; + regs.eip = entry; + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s, sizeof(regs)); + +out: + free(command_line); + return result; +} + +/* + * EOF (kexec-multiboot-x86.c) + */ diff --git a/kexec/arch/i386/kexec-nbi.c b/kexec/arch/i386/kexec-nbi.c new file mode 100644 index 0000000..8eb2154 --- /dev/null +++ b/kexec/arch/i386/kexec-nbi.c @@ -0,0 +1,249 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "kexec-x86.h" +#include <arch/options.h> + +struct segheader +{ + uint8_t length; + uint8_t vendortag; + uint8_t reserved; + uint8_t flags; +#define NBI_SEG 0x3 +#define NBI_SEG_ABSOLUTE 0 +#define NBI_SEG_APPEND 1 +#define NBI_SEG_NEGATIVE 2 +#define NBI_SEG_PREPEND 3 +#define NBI_LAST_SEG (1 << 2) + uint32_t loadaddr; + uint32_t imglength; + uint32_t memlength; +}; + +struct imgheader +{ +#define NBI_MAGIC "\x36\x13\x03\x1b" + uint8_t magic[4]; +#define NBI_RETURNS (1 << 8) +#define NBI_ENTRY32 (1 << 31) + uint32_t length; /* and flags */ + struct { uint16_t bx, ds; } segoff; + union { + struct { uint16_t ip, cs; } segoff; + uint32_t linear; + } execaddr; +}; + + +static const int probe_debug = 0; + +int nbi_probe(const char *buf, off_t len) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + /* If we don't have enough data give up */ + if (((uintmax_t)len < (uintmax_t)sizeof(hdr)) || (len < 512)) { + return -1; + } + memcpy(&hdr, buf, sizeof(hdr)); + if (memcmp(hdr.magic, NBI_MAGIC, sizeof(hdr.magic)) != 0) { + return -1; + } + /* Ensure we have a properly sized header */ + if (((hdr.length & 0xf)*4) != sizeof(hdr)) { + if (probe_debug) { + fprintf(stderr, "NBI: Bad vendor header size\n"); + } + return -1; + } + /* Ensure the vendor header is not too large. + * This can't actually happen but.... + */ + if ((((hdr.length & 0xf0) >> 4)*4) > (512 - sizeof(hdr))) { + if (probe_debug) { + fprintf(stderr, "NBI: vendor headr too large\n"); + } + return -1; + } + /* Reserved bits are set in the image... */ + if ((hdr.length & 0x7ffffe00)) { + if (probe_debug) { + fprintf(stderr, "NBI: Reserved header bits set\n"); + } + return -1; + } + /* If the image can return refuse to load it */ + if (hdr.length & (1 << 8)) { + if (probe_debug) { + printf("NBI: image wants to return\n"); + } + return -1; + } + /* Now verify the segments all fit within 512 bytes */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + memcpy(&seg, buf + seg_off, sizeof(seg)); + if ((seg.length & 0xf) != 4) { + if (probe_debug) { + fprintf(stderr, "NBI: Invalid segment length\n"); + } + return -1; + } + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if (seg.flags & 0xf8) { + if (probe_debug) { + fprintf(stderr, "NBI: segment reserved flags set\n"); + } + return -1; + } + if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + if (probe_debug) { + fprintf(stderr, "NBI: negative segment addresses not supported\n"); + } + return -1; + } + if (seg_off > 512) { + if (probe_debug) { + fprintf(stderr, "NBI: segment outside 512 header\n"); + } + return -1; + } + } while(!(seg.flags & NBI_LAST_SEG)); + return 0; +} + +void nbi_usage(void) +{ + printf( "\n" + ); +} + +int nbi_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + struct imgheader hdr; + struct segheader seg; + off_t seg_off; + off_t file_off; + uint32_t last0, last1; + int opt; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + } + } + /* Get a copy of the header */ + memcpy(&hdr, buf, sizeof(hdr)); + + /* Load the first 512 bytes */ + add_segment(info, buf + 0, 512, + (hdr.segoff.ds << 4) + hdr.segoff.bx, 512); + + /* Initialize variables */ + file_off = 512; + last0 = (hdr.segoff.ds << 4) + hdr.segoff.bx; + last1 = last0 + 512; + + /* Load the segments */ + seg_off = (((hdr.length & 0xf0) >> 4) + (hdr.length & 0x0f)) << 2; + do { + uint32_t loadaddr; + memcpy(&seg, buf + seg_off, sizeof(seg)); + seg_off += ((seg.length & 0xf) + ((seg.length >> 4) & 0xf)) << 2; + if ((seg.flags & NBI_SEG) == NBI_SEG_ABSOLUTE) { + loadaddr = seg.loadaddr; + } + else if ((seg.flags & NBI_SEG) == NBI_SEG_APPEND) { + loadaddr = last1 + seg.loadaddr; + } +#if 0 + else if ((seg.flags & NBI_SEG) == NBI_SEG_NEGATIVE) { + loadaddr = memsize - seg.loadaddr; + } +#endif + else if ((seg.flags & NBI_SEG) == NBI_SEG_PREPEND) { + loadaddr = last0 - seg.loadaddr; + } + else { + printf("warning: unhandled segment of type %0x\n", + seg.flags & NBI_SEG); + continue; + } + add_segment(info, buf + file_off, seg.imglength, + loadaddr, seg.memlength); + last0 = loadaddr; + last1 = last0 + seg.memlength; + file_off += seg.imglength; + } while(!(seg.flags & NBI_LAST_SEG)); + + if (hdr.length & NBI_ENTRY32) { + struct entry32_regs regs32; + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + regs32.eip = hdr.execaddr.linear; + elf_rel_set_symbol(&info->rhdr, "entry32_regs32", ®s32, sizeof(regs32)); + } + else { + struct entry32_regs regs32; + struct entry16_regs regs16; + + /* Initialize the 16 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + regs16.cs = hdr.execaddr.segoff.cs; + regs16.ip = hdr.execaddr.segoff.ip; + elf_rel_set_symbol(&info->rhdr, "entry16_regs", ®s16, sizeof(regs16)); + + /* Initialize the 32 bit registers */ + elf_rel_get_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + regs32.eip = elf_rel_get_addr(&info->rhdr, "entry16"); + elf_rel_set_symbol(&info->rhdr, "entry32_regs", ®s32, sizeof(regs32)); + } + return 0; +} diff --git a/kexec/arch/i386/kexec-x86-common.c b/kexec/arch/i386/kexec-x86-common.c new file mode 100644 index 0000000..ffc95a9 --- /dev/null +++ b/kexec/arch/i386/kexec-x86-common.c @@ -0,0 +1,444 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _DEFAULT_SOURCE + +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "../../crashdump.h" +#include "../../kexec-xen.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" + +/* Used below but not present in (older?) xenctrl.h */ +#ifndef E820_PMEM +#define E820_PMEM 7 +#define E820_PRAM 12 +#endif + +/* + * The real mode IVT ends at 0x400. + * See https://wiki.osdev.org/Interrupt_Vector_Table. + */ +#define REALMODE_IVT_END 0x400 + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/** + * The old /proc/iomem parsing code. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_proc_iomem(struct memory_range **range, int *ranges) +{ + const char *iomem= proc_iomem(); + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + char *str; + int type; + int consumed; + int count; + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + dbgprintf("%016Lx-%016Lx : %s", start, end, str); + + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } + else if (strncasecmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else if (memcmp(str, "ACPI Tables\n", 12) == 0) { + type = RANGE_ACPI; + } + else if (memcmp(str, "ACPI Non-volatile Storage\n", 26) == 0) { + type = RANGE_ACPI_NVS; + } + else if (memcmp(str, "Persistent Memory (legacy)\n", 27) == 0) { + type = RANGE_PRAM; + } + else if (memcmp(str, "Persistent Memory\n", 18) == 0) { + type = RANGE_PMEM; + } + else { + continue; + } + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + + dbgprintf("%016Lx-%016Lx : %x\n", start, end, type); + + memory_ranges++; + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +/** + * Calls the architecture independent get_firmware_memmap_ranges() to parse + * /sys/firmware/memmap and then do some x86 only modifications. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_sysfs(struct memory_range **range, int *ranges) +{ + int ret; + size_t range_number = MAX_MEMORY_RANGES; + + ret = get_firmware_memmap_ranges(memory_range, &range_number); + if (ret != 0) { + fprintf(stderr, "Parsing the /sys/firmware memory map failed. " + "Falling back to /proc/iomem.\n"); + return get_memory_ranges_proc_iomem(range, ranges); + } + + *range = memory_range; + *ranges = range_number; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +unsigned xen_e820_to_kexec_type(uint32_t type) +{ + switch (type) { + case E820_RAM: + return RANGE_RAM; + case E820_ACPI: + return RANGE_ACPI; + case E820_NVS: + return RANGE_ACPI_NVS; + case E820_PMEM: + return RANGE_PMEM; + case E820_PRAM: + return RANGE_PRAM; + case E820_RESERVED: + default: + return RANGE_RESERVED; + } +} + +/** + * Memory map detection for Xen. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + int rc, ret = -1; + struct e820entry e820entries[MAX_MEMORY_RANGES]; + unsigned int i; + xc_interface *xc; + + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + return -1; + } + + rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(rc)); + goto err; + } + + for (i = 0; i < rc; ++i) { + memory_range[i].start = e820entries[i].addr; + memory_range[i].end = e820entries[i].addr + e820entries[i].size - 1; + memory_range[i].type = xen_e820_to_kexec_type(e820entries[i].type); + } + + qsort(memory_range, rc, sizeof(struct memory_range), compare_ranges); + + *range = memory_range; + *ranges = rc; + + ret = 0; + +err: + xc_interface_close(xc); + + return ret; +} +#else +static int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ + +static void remove_range(struct memory_range *range, int nr_ranges, int index) +{ + int i, j; + + for (i = index; i < (nr_ranges-1); i++) { + j = i+1; + range[i] = range[j]; + } +} + +/** + * Verifies and corrects any overlapping ranges. + * The ranges array is assumed to be sorted already. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +static int fixup_memory_ranges(struct memory_range **range, int *ranges) +{ + int i; + int j; + int change_made; + int nr_ranges = *ranges; + struct memory_range *rp = *range; + +again: + change_made = 0; + for (i = 0; i < (nr_ranges-1); i++) { + j = i+1; + if (rp[i].start > rp[j].start) { + fprintf(stderr, "memory out of order!!\n"); + return 1; + } + + if (rp[i].type != rp[j].type) + continue; + + if (rp[i].start == rp[j].start) { + if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } else { + remove_range(rp, nr_ranges, i); + nr_ranges--; + change_made++; + } + } else { + if (rp[i].end > rp[j].start) { + if (rp[i].end < rp[j].end) { + rp[j].start = rp[i].end; + change_made++; + } else if (rp[i].end >= rp[j].end) { + remove_range(rp, nr_ranges, j); + nr_ranges--; + change_made++; + } + } + } + } + + /* fixing/removing an entry may make it wrong relative to the next */ + if (change_made) + goto again; + + *ranges = nr_ranges; + return 0; +} + +/** + * Detect the add_efi_memmap kernel parameter. + * + * On some EFI-based systems, the e820 map is empty, or does not contain a + * complete memory map. The add_efi_memmap parameter adds these entries to + * the kernel's memory map, but does not add them under sysfs, which causes + * kexec to fail in a way similar to how it does not work on Xen. + * + * @return 1 if parameter is present, 0 if not or if an error occurs. + */ +int efi_map_added( void ) { + char buf[512]; + FILE *fp = fopen( "/proc/cmdline", "r" ); + if( fp ) { + fgets( buf, 512, fp ); + fclose( fp ); + return strstr( buf, "add_efi_memmap" ) != NULL; + } else { + return 0; + } +} + +/** + * Return a sorted list of memory ranges. + * + * If we have the /sys/firmware/memmap interface, then use that. If not, + * or if parsing of that fails, use /proc/iomem as fallback. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * @param[in] kexec_flags the kexec_flags to determine if we load a normal + * or a crashdump kernel + * + * @return 0 on success, any other value on failure. + */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int ret, i; + + if (!efi_map_added() && !xen_present() && have_sys_firmware_memmap()) { + ret = get_memory_ranges_sysfs(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else if (xen_present()) { + ret = get_memory_ranges_xen(range, ranges); + if (!ret) + ret = fixup_memory_ranges(range, ranges); + } else + ret = get_memory_ranges_proc_iomem(range, ranges); + + /* + * get_memory_ranges_sysfs(), get_memory_ranges_proc_iomem() and + * get_memory_ranges_xen() have already printed an error message, + * so fail silently here. + */ + if (ret != 0) + return ret; + + /* Don't report the interrupt table as ram */ + for (i = 0; i < *ranges; i++) { + if ((*range)[i].type == RANGE_RAM && + ((*range)[i].start < REALMODE_IVT_END)) { + (*range)[i].start = REALMODE_IVT_END; + break; + } + } + + /* + * Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported values are + * subset of user defined values. + */ + if ((kexec_flags & KEXEC_ON_CRASH) && + !(kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + uint64_t start, end; + + ret = get_crash_kernel_load_range(&start, &end); + if (ret != 0) { + fprintf(stderr, "get_crash_kernel_load_range failed.\n"); + return -1; + } + + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); + + return ret; +} + +static uint64_t bootparam_get_acpi_rsdp(void) { + uint64_t acpi_rsdp = 0; + off_t offset = offsetof(struct x86_linux_param_header, acpi_rsdp_addr); + + if (get_bootparam(&acpi_rsdp, offset, sizeof(acpi_rsdp))) + return 0; + + return acpi_rsdp; +} + +static uint64_t efi_get_acpi_rsdp(void) { + FILE *fp; + char line[MAX_LINE], *s; + uint64_t acpi_rsdp = 0; + + fp = fopen("/sys/firmware/efi/systab", "r"); + if (!fp) + return acpi_rsdp; + + while(fgets(line, sizeof(line), fp) != 0) { + /* ACPI20= always goes before ACPI= */ + if ((strstr(line, "ACPI20=")) || (strstr(line, "ACPI="))) { + s = strchr(line, '=') + 1; + sscanf(s, "0x%lx", &acpi_rsdp); + break; + } + } + fclose(fp); + + return acpi_rsdp; +} + +uint64_t get_acpi_rsdp(void) +{ + uint64_t acpi_rsdp = 0; + + acpi_rsdp = bootparam_get_acpi_rsdp(); + + if (!acpi_rsdp) + acpi_rsdp = efi_get_acpi_rsdp(); + + return acpi_rsdp; +} diff --git a/kexec/arch/i386/kexec-x86.c b/kexec/arch/i386/kexec-x86.c new file mode 100644 index 0000000..444cb69 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.c @@ -0,0 +1,210 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../firmware_memmap.h" +#include "kexec-x86.h" +#include "crashdump-x86.h" +#include <arch/options.h> + +struct file_type file_type[] = { + { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, + multiboot_x86_usage }, + { "multiboot2-x86", multiboot2_x86_probe, multiboot2_x86_load, + multiboot2_x86_usage }, + { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, + { "bzImage", bzImage_probe, bzImage_load, bzImage_usage }, + { "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage }, + { "nbi-x86", nbi_probe, nbi_load, nbi_usage }, +}; +int file_types = sizeof(file_type)/sizeof(file_type[0]); + + +void arch_usage(void) +{ + printf( + " --reset-vga Attempt to reset a standard vga device\n" + " --serial=<port> Specify the serial port for debug output\n" + " --serial-baud=<baud_rate> Specify the serial port baud rate\n" + " --console-vga Enable the vga console\n" + " --console-serial Enable the serial console\n" + " --elf32-core-headers Prepare core headers in ELF32 format\n" + " --elf64-core-headers Prepare core headers in ELF64 format\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" + ); +} + +struct arch_options_t arch_options = { + .reset_vga = 0, + .serial_base = 0x3f8, + .serial_baud = 0, + .console_vga = 0, + .console_serial = 0, + .core_header_type = CORE_TYPE_UNDEF, + .pass_memmap_cmdline = 0, + .noefi = 0, +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + unsigned long value; + char *end; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_RESET_VGA: + arch_options.reset_vga = 1; + break; + case OPT_CONSOLE_VGA: + arch_options.console_vga = 1; + break; + case OPT_CONSOLE_SERIAL: + arch_options.console_serial = 1; + break; + case OPT_SERIAL: + value = ULONG_MAX; + if (strcmp(optarg, "ttyS0") == 0) { + value = 0x3f8; + } + else if (strcmp(optarg, "ttyS1") == 0) { + value = 0x2f8; + } + else if (strncmp(optarg, "0x", 2) == 0) { + value = strtoul(optarg +2, &end, 16); + if (*end != '\0') { + value = ULONG_MAX; + } + } + if (value >= 65536) { + fprintf(stderr, "Bad serial port base '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_base = value; + break; + case OPT_SERIAL_BAUD: + value = strtoul(optarg, &end, 0); + if ((value > 115200) || ((115200 %value) != 0) || + (value < 9600) || (*end)) + { + fprintf(stderr, "Bad serial port baud rate '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_baud = value; + break; + case OPT_ELF32_CORE: + arch_options.core_header_type = CORE_TYPE_ELF32; + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_386 here. + */ + { "i386", KEXEC_ARCH_DEFAULT }, + { "i486", KEXEC_ARCH_DEFAULT }, + { "i586", KEXEC_ARCH_DEFAULT }, + { "i686", KEXEC_ARCH_DEFAULT }, + { "x86_64", KEXEC_ARCH_X86_64 }, + { 0, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *info) +{ + if ((info->kexec_flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_X86_64) + { + if (!info->rhdr.e_shdr) { + fprintf(stderr, + "A trampoline is required for cross architecture support\n"); + return -1; + } + elf_rel_set_symbol(&info->rhdr, "compat_x86_64_entry32", + &info->entry, sizeof(info->entry)); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "compat_x86_64"); + } + return 0; +} + +void arch_update_purgatory(struct kexec_info *info) +{ + uint8_t panic_kernel = 0; + + elf_rel_set_symbol(&info->rhdr, "reset_vga", + &arch_options.reset_vga, sizeof(arch_options.reset_vga)); + elf_rel_set_symbol(&info->rhdr, "serial_base", + &arch_options.serial_base, sizeof(arch_options.serial_base)); + elf_rel_set_symbol(&info->rhdr, "serial_baud", + &arch_options.serial_baud, sizeof(arch_options.serial_baud)); + elf_rel_set_symbol(&info->rhdr, "console_vga", + &arch_options.console_vga, sizeof(arch_options.console_vga)); + elf_rel_set_symbol(&info->rhdr, "console_serial", + &arch_options.console_serial, sizeof(arch_options.console_serial)); + elf_rel_set_symbol(&info->rhdr, "backup_src_start", + &info->backup_src_start, sizeof(info->backup_src_start)); + elf_rel_set_symbol(&info->rhdr, "backup_src_size", + &info->backup_src_size, sizeof(info->backup_src_size)); + if (info->kexec_flags & KEXEC_ON_CRASH) { + panic_kernel = 1; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &info->backup_start, sizeof(info->backup_start)); + } + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &panic_kernel, sizeof(panic_kernel)); +} diff --git a/kexec/arch/i386/kexec-x86.h b/kexec/arch/i386/kexec-x86.h new file mode 100644 index 0000000..46e2898 --- /dev/null +++ b/kexec/arch/i386/kexec-x86.h @@ -0,0 +1,97 @@ +#ifndef KEXEC_X86_H +#define KEXEC_X86_H + +#define MAX_MEMORY_RANGES 2048 + +enum coretype { + CORE_TYPE_UNDEF = 0, + CORE_TYPE_ELF32 = 1, + CORE_TYPE_ELF64 = 2 +}; + +extern unsigned char compat_x86_64[]; +extern uint32_t compat_x86_64_size, compat_x86_64_entry32; + +struct entry32_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint32_t eip; +}; + +struct entry16_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t esp; + uint32_t ebp; + uint16_t ds; + uint16_t es; + uint16_t ss; + uint16_t fs; + uint16_t gs; + uint16_t ip; + uint16_t cs; + uint16_t pad; +}; + +struct arch_options_t { + uint8_t reset_vga; + uint16_t serial_base; + uint32_t serial_baud; + uint8_t console_vga; + uint8_t console_serial; + enum coretype core_header_type; + uint8_t pass_memmap_cmdline; + uint8_t noefi; + uint8_t reuse_video_type; +}; + +int multiboot_x86_probe(const char *buf, off_t len); +int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot_x86_usage(void); + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot2_x86_usage(void); +int multiboot2_x86_probe(const char *buf, off_t buf_len); + +int elf_x86_probe(const char *buf, off_t len); +int elf_x86_any_probe(const char *buf, off_t len, enum coretype arch); +int elf_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_x86_usage(void); + +int bzImage_probe(const char *buf, off_t len); +int bzImage_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void bzImage_usage(void); +int do_bzImage_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len, + const char *dtb, off_t dtb_len, + int real_mode_entry); + +int beoboot_probe(const char *buf, off_t len); +int beoboot_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void beoboot_usage(void); + +int nbi_probe(const char *buf, off_t len); +int nbi_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void nbi_usage(void); + +extern unsigned xen_e820_to_kexec_type(uint32_t type); +extern uint64_t get_acpi_rsdp(void); +#endif /* KEXEC_X86_H */ diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c new file mode 100644 index 0000000..9a281dc --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.c @@ -0,0 +1,971 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#define _GNU_SOURCE +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/random.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/fb.h> +#include <linux/screen_info.h> +#include <unistd.h> +#include <dirent.h> +#include <mntent.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "kexec-x86.h" +#include "x86-linux-setup.h" +#include "../../kexec/kexec-syscall.h" + +#ifndef VIDEO_CAPABILITY_64BIT_BASE +#define VIDEO_CAPABILITY_64BIT_BASE (1 << 1) /* Frame buffer base is 64-bit */ +#endif + +void init_linux_parameters(struct x86_linux_param_header *real_mode) +{ + /* Fill in the values that are usually provided by the kernel. */ + + /* Boot block magic */ + memcpy(real_mode->header_magic, "HdrS", 4); + real_mode->protocol_version = 0x0206; + real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + real_mode->cmdline_size = COMMAND_LINE_SIZE; +} + +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high) +{ + char *cmdline_ptr; + unsigned long initrd_base, initrd_addr_max; + + /* Say I'm a boot loader */ + real_mode->loader_type = LOADER_TYPE_KEXEC << 4; + + /* No loader flags */ + real_mode->loader_flags = 0; + + /* Find the maximum initial ramdisk address */ + if (initrd_high) + initrd_addr_max = ULONG_MAX; + else { + initrd_addr_max = DEFAULT_INITRD_ADDR_MAX; + if (real_mode->protocol_version >= 0x0203) { + initrd_addr_max = real_mode->initrd_addr_max; + dbgprintf("initrd_addr_max is 0x%lx\n", + initrd_addr_max); + } + } + + /* Load the initrd if we have one */ + if (initrd_buf) { + initrd_base = add_buffer(info, + initrd_buf, initrd_size, initrd_size, + 4096, INITRD_BASE, initrd_addr_max, -1); + dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base, + initrd_size); + } else { + initrd_base = 0; + initrd_size = 0; + } + + /* Ramdisk address and size */ + real_mode->initrd_start = initrd_base & 0xffffffffUL; + real_mode->initrd_size = initrd_size & 0xffffffffUL; + + if (real_mode->protocol_version >= 0x020c && + (initrd_base & 0xffffffffUL) != initrd_base) + real_mode->ext_ramdisk_image = initrd_base >> 32; + + if (real_mode->protocol_version >= 0x020c && + (initrd_size & 0xffffffffUL) != initrd_size) + real_mode->ext_ramdisk_size = initrd_size >> 32; + + /* The location of the command line */ + /* if (real_mode_base == 0x90000) { */ + real_mode->cl_magic = CL_MAGIC_VALUE; + real_mode->cl_offset = cmdline_offset; + /* setup_move_size */ + /* } */ + if (real_mode->protocol_version >= 0x0202) { + unsigned long cmd_line_ptr = real_mode_base + cmdline_offset; + + real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffffUL; + if ((real_mode->protocol_version >= 0x020c) && + ((cmd_line_ptr & 0xffffffffUL) != cmd_line_ptr)) + real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32; + } + + /* Fill in the command line */ + if (cmdline_len > COMMAND_LINE_SIZE) { + cmdline_len = COMMAND_LINE_SIZE; + } + cmdline_ptr = ((char *)real_mode) + cmdline_offset; + memcpy(cmdline_ptr, cmdline, cmdline_len); + cmdline_ptr[cmdline_len - 1] = '\0'; +} + +static int setup_linux_vesafb(struct x86_linux_param_header *real_mode) +{ + struct fb_fix_screeninfo fix; + struct fb_var_screeninfo var; + int fd; + + fd = open("/dev/fb0", O_RDONLY); + if (-1 == fd) + return -1; + + if (-1 == ioctl(fd, FBIOGET_FSCREENINFO, &fix)) + goto out; + if (-1 == ioctl(fd, FBIOGET_VSCREENINFO, &var)) + goto out; + if (0 == strcmp(fix.id, "VESA VGA")) { + /* VIDEO_TYPE_VLFB */ + real_mode->orig_video_isVGA = 0x23; + } else if (0 == strcmp(fix.id, "EFI VGA")) { + /* VIDEO_TYPE_EFI */ + real_mode->orig_video_isVGA = 0x70; + } else if (arch_options.reuse_video_type) { + int err; + off_t offset = offsetof(typeof(*real_mode), orig_video_isVGA); + + /* blindly try old boot time video type */ + err = get_bootparam(&real_mode->orig_video_isVGA, offset, 1); + if (err) + goto out; + } else { + real_mode->orig_video_isVGA = 0; + close(fd); + return 0; + } + close(fd); + + real_mode->lfb_width = var.xres; + real_mode->lfb_height = var.yres; + real_mode->lfb_depth = var.bits_per_pixel; + real_mode->lfb_base = fix.smem_start & 0xffffffffUL; + real_mode->lfb_linelength = fix.line_length; + real_mode->vesapm_seg = 0; + + if (fix.smem_start > 0xffffffffUL) { + real_mode->ext_lfb_base = fix.smem_start >> 32; + real_mode->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + + /* FIXME: better get size from the file returned by proc_iomem() */ + real_mode->lfb_size = (fix.smem_len + 65535) / 65536; + real_mode->pages = (fix.smem_len + 4095) / 4096; + + if (var.bits_per_pixel > 8) { + real_mode->red_pos = var.red.offset; + real_mode->red_size = var.red.length; + real_mode->green_pos = var.green.offset; + real_mode->green_size = var.green.length; + real_mode->blue_pos = var.blue.offset; + real_mode->blue_size = var.blue.length; + real_mode->rsvd_pos = var.transp.offset; + real_mode->rsvd_size = var.transp.length; + } + return 0; + + out: + close(fd); + return -1; +} + +#define EDD_SYFS_DIR "/sys/firmware/edd" + +#define EDD_EXT_FIXED_DISK_ACCESS (1 << 0) +#define EDD_EXT_DEVICE_LOCKING_AND_EJECTING (1 << 1) +#define EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT (1 << 2) +#define EDD_EXT_64BIT_EXTENSIONS (1 << 3) + +/* + * Scans one line from a given filename. Returns on success the number of + * items written (same like scanf()). + */ +static int file_scanf(const char *dir, const char *file, const char *scanf_line, ...) +{ + va_list argptr; + FILE *fp; + int retno; + char filename[PATH_MAX]; + + snprintf(filename, PATH_MAX, "%s/%s", dir, file); + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + va_start(argptr, scanf_line); + retno = vfscanf(fp, scanf_line, argptr); + va_end(argptr); + + fclose(fp); + + return retno; +} + +static int parse_edd_extensions(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + char line[1024]; + uint16_t flags = 0; + FILE *fp; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "extensions"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + while (fgets(line, 1024, fp)) { + /* + * strings are in kernel source, function edd_show_extensions() + * drivers/firmware/edd.c + */ + if (strstr(line, "Fixed disk access") == line) + flags |= EDD_EXT_FIXED_DISK_ACCESS; + else if (strstr(line, "Device locking and ejecting") == line) + flags |= EDD_EXT_DEVICE_LOCKING_AND_EJECTING; + else if (strstr(line, "Enhanced Disk Drive support") == line) + flags |= EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT; + else if (strstr(line, "64-bit extensions") == line) + flags |= EDD_EXT_64BIT_EXTENSIONS; + } + + fclose(fp); + + edd_info->interface_support = flags; + + return 0; +} + +static int read_edd_raw_data(const char *dir, struct edd_info *edd_info) +{ + char filename[PATH_MAX]; + FILE *fp; + size_t read_chars; + uint16_t len; + int ret; + + ret = snprintf(filename, PATH_MAX, "%s/%s", dir, "raw_data"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + fp = fopen(filename, "r"); + if (!fp) { + return -errno; + } + + memset(edd_info->edd_device_params, 0, EDD_DEVICE_PARAM_SIZE); + read_chars = fread(edd_info->edd_device_params, sizeof(uint8_t), + EDD_DEVICE_PARAM_SIZE, fp); + fclose(fp); + + len = ((uint16_t *)edd_info->edd_device_params)[0]; + dbgprintf("EDD raw data has length %d\n", len); + + if (read_chars < len) { + fprintf(stderr, "BIOS reported EDD length of %hd but only " + "%d chars read.\n", len, (int)read_chars); + return -1; + } + + return 0; +} + +static int add_edd_entry(struct x86_linux_param_header *real_mode, + const char *sysfs_name, int *current_edd, int *current_mbr) +{ + uint8_t devnum, version; + uint32_t mbr_sig; + struct edd_info *edd_info; + + if (!current_mbr || !current_edd) { + fprintf(stderr, "%s: current_edd and current_edd " + "must not be NULL", __FUNCTION__); + return -1; + } + + edd_info = &real_mode->eddbuf[*current_edd]; + memset(edd_info, 0, sizeof(struct edd_info)); + + /* extract the device number */ + if (sscanf(basename(sysfs_name), "int13_dev%hhx", &devnum) != 1) { + fprintf(stderr, "Invalid format of int13_dev dir " + "entry: %s\n", basename(sysfs_name)); + return -1; + } + + /* if there's a MBR signature, then add it */ + if (file_scanf(sysfs_name, "mbr_signature", "0x%x", &mbr_sig) == 1) { + real_mode->edd_mbr_sig_buffer[*current_mbr] = mbr_sig; + (*current_mbr)++; + dbgprintf("EDD Device 0x%x: mbr_sig=0x%x\n", devnum, mbr_sig); + } + + /* set the device number */ + edd_info->device = devnum; + + /* set the version */ + if (file_scanf(sysfs_name, "version", "0x%hhx", &version) != 1) + return -1; + + edd_info->version = version; + + /* if version == 0, that's some kind of dummy entry */ + if (version != 0) { + /* legacy_max_cylinder */ + if (file_scanf(sysfs_name, "legacy_max_cylinder", "%hu", + &edd_info->legacy_max_cylinder) != 1) { + fprintf(stderr, "Reading legacy_max_cylinder failed.\n"); + return -1; + } + + /* legacy_max_head */ + if (file_scanf(sysfs_name, "legacy_max_head", "%hhu", + &edd_info->legacy_max_head) != 1) { + fprintf(stderr, "Reading legacy_max_head failed.\n"); + return -1; + } + + /* legacy_sectors_per_track */ + if (file_scanf(sysfs_name, "legacy_sectors_per_track", "%hhu", + &edd_info->legacy_sectors_per_track) != 1) { + fprintf(stderr, "Reading legacy_sectors_per_track failed.\n"); + return -1; + } + + /* Parse the EDD extensions */ + if (parse_edd_extensions(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Parsing EDD extensions failed.\n"); + return -1; + } + + /* Parse the raw info */ + if (read_edd_raw_data(sysfs_name, edd_info) != 0) { + fprintf(stderr, "Reading EDD raw data failed.\n"); + return -1; + } + } + + (*current_edd)++; + + return 0; +} + +static void zero_edd(struct x86_linux_param_header *real_mode) +{ + real_mode->eddbuf_entries = 0; + real_mode->edd_mbr_sig_buf_entries = 0; + memset(real_mode->eddbuf, 0, + EDDMAXNR * sizeof(struct edd_info)); + memset(real_mode->edd_mbr_sig_buffer, 0, + EDD_MBR_SIG_MAX * sizeof(uint32_t)); +} + +void setup_edd_info(struct x86_linux_param_header *real_mode) +{ + DIR *edd_dir; + struct dirent *cursor; + int current_edd = 0; + int current_mbr = 0; + + edd_dir = opendir(EDD_SYFS_DIR); + if (!edd_dir) { + dbgprintf(EDD_SYFS_DIR " does not exist.\n"); + return; + } + + zero_edd(real_mode); + while ((cursor = readdir(edd_dir))) { + char full_dir_name[PATH_MAX]; + + /* only read the entries that start with "int13_dev" */ + if (strstr(cursor->d_name, "int13_dev") != cursor->d_name) + continue; + + snprintf(full_dir_name, PATH_MAX, "%s/%s", + EDD_SYFS_DIR, cursor->d_name); + full_dir_name[PATH_MAX-1] = 0; + + if (add_edd_entry(real_mode, full_dir_name, ¤t_edd, + ¤t_mbr) != 0) { + zero_edd(real_mode); + goto out; + } + } + + real_mode->eddbuf_entries = current_edd; + real_mode->edd_mbr_sig_buf_entries = current_mbr; + +out: + closedir(edd_dir); + + dbgprintf("Added %d EDD MBR entries and %d EDD entries.\n", + real_mode->edd_mbr_sig_buf_entries, + real_mode->eddbuf_entries); +} + +/* + * This really only makes sense for virtual filesystems that are only expected + * to be mounted once (sysfs, debugsfs, proc), as it will return the first + * instance listed in /proc/mounts, falling back to mtab if absent. + * We search by type and not by name because the name can be anything; + * while setting the name equal to the mount point is common, it cannot be + * relied upon, as even kernel documentation examples recommends using + * "none" as the name e.g. for debugfs. + */ +char *find_mnt_by_type(char *type) +{ + FILE *mtab; + struct mntent *mnt; + char *mntdir; + + mtab = setmntent("/proc/mounts", "r"); + if (!mtab) { + // Fall back to mtab + mtab = setmntent("/etc/mtab", "r"); + } + if (!mtab) + return NULL; + for(mnt = getmntent(mtab); mnt; mnt = getmntent(mtab)) { + if (strcmp(mnt->mnt_type, type) == 0) + break; + } + mntdir = mnt ? strdup(mnt->mnt_dir) : NULL; + endmntent(mtab); + return mntdir; +} + +int get_bootparam(void *buf, off_t offset, size_t size) +{ + int data_file; + char *debugfs_mnt, *sysfs_mnt; + char filename[PATH_MAX]; + int err, has_sysfs_params = 0; + + sysfs_mnt = find_mnt_by_type("sysfs"); + if (sysfs_mnt) { + snprintf(filename, PATH_MAX, "%s/%s", sysfs_mnt, + "kernel/boot_params/data"); + free(sysfs_mnt); + err = access(filename, F_OK); + if (!err) + has_sysfs_params = 1; + } + + if (!has_sysfs_params) { + debugfs_mnt = find_mnt_by_type("debugfs"); + if (!debugfs_mnt) + return 1; + snprintf(filename, PATH_MAX, "%s/%s", debugfs_mnt, + "boot_params/data"); + free(debugfs_mnt); + } + + data_file = open(filename, O_RDONLY); + if (data_file < 0) + return 1; + if (lseek(data_file, offset, SEEK_SET) < 0) + goto close; + read(data_file, buf, size); +close: + close(data_file); + return 0; +} + +void setup_subarch(struct x86_linux_param_header *real_mode) +{ + off_t offset = offsetof(typeof(*real_mode), hardware_subarch); + + get_bootparam(&real_mode->hardware_subarch, offset, sizeof(uint32_t)); +} + +struct efi_mem_descriptor { + uint32_t type; + uint32_t pad; + uint64_t phys_addr; + uint64_t virt_addr; + uint64_t num_pages; + uint64_t attribute; +}; + +struct efi_setup_data { + uint64_t fw_vendor; + uint64_t runtime; + uint64_t tables; + uint64_t smbios; + uint64_t reserved[8]; +}; + +struct setup_data { + uint64_t next; + uint32_t type; +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_RNG_SEED 9 + uint32_t len; + uint8_t data[0]; +} __attribute__((packed)); + +static int get_efi_value(const char *filename, + const char *pattern, uint64_t *val) +{ + FILE *fp; + char line[1024], *s, *end; + + fp = fopen(filename, "r"); + if (!fp) + return 1; + + while (fgets(line, sizeof(line), fp) != 0) { + s = strstr(line, pattern); + if (!s) + continue; + *val = strtoull(s + strlen(pattern), &end, 16); + if (*val == ULLONG_MAX) { + fclose(fp); + return 2; + } + break; + } + + fclose(fp); + return 0; +} + +static int get_efi_values(struct efi_setup_data *esd) +{ + int ret = 0; + + ret = get_efi_value("/sys/firmware/efi/systab", "SMBIOS=0x", + &esd->smbios); + ret |= get_efi_value("/sys/firmware/efi/fw_vendor", "0x", + &esd->fw_vendor); + ret |= get_efi_value("/sys/firmware/efi/runtime", "0x", + &esd->runtime); + ret |= get_efi_value("/sys/firmware/efi/config_table", "0x", + &esd->tables); + return ret; +} + +static int get_efi_runtime_map(struct efi_mem_descriptor **map) +{ + DIR *dirp; + struct dirent *entry; + char filename[1024]; + struct efi_mem_descriptor md, *p = NULL; + int nr_maps = 0; + + dirp = opendir("/sys/firmware/efi/runtime-map"); + if (!dirp) + return 0; + while ((entry = readdir(dirp)) != NULL) { + sprintf(filename, + "/sys/firmware/efi/runtime-map/%s", + (char *)entry->d_name); + if (*entry->d_name == '.') + continue; + file_scanf(filename, "type", "0x%x", (unsigned int *)&md.type); + file_scanf(filename, "phys_addr", "0x%llx", + (unsigned long long *)&md.phys_addr); + file_scanf(filename, "virt_addr", "0x%llx", + (unsigned long long *)&md.virt_addr); + file_scanf(filename, "num_pages", "0x%llx", + (unsigned long long *)&md.num_pages); + file_scanf(filename, "attribute", "0x%llx", + (unsigned long long *)&md.attribute); + p = realloc(p, (nr_maps + 1) * sizeof(md)); + if (!p) + goto err_out; + + *(p + nr_maps) = md; + *map = p; + nr_maps++; + } + + closedir(dirp); + return nr_maps; +err_out: + if (*map) + free(*map); + closedir(dirp); + return 0; +} + +struct efi_info { + uint32_t efi_loader_signature; + uint32_t efi_systab; + uint32_t efi_memdesc_size; + uint32_t efi_memdesc_version; + uint32_t efi_memmap; + uint32_t efi_memmap_size; + uint32_t efi_systab_hi; + uint32_t efi_memmap_hi; +}; + +/* + * Add another instance to single linked list of struct setup_data. + * Please refer to kernel Documentation/x86/boot.txt for more details + * about setup_data structure. + */ +static void add_setup_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode, + struct setup_data *sd) +{ + int sdsize = sizeof(struct setup_data) + sd->len; + + sd->next = real_mode->setup_data; + real_mode->setup_data = add_buffer(info, sd, sdsize, sdsize, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); +} + +/* + * setup_efi_data will collect below data and pass them to 2nd kernel. + * 1) SMBIOS, fw_vendor, runtime, config_table, they are passed via x86 + * setup_data. + * 2) runtime memory regions, set the memmap related fields in efi_info. + */ +static int setup_efi_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int64_t memmap_paddr; + struct setup_data *sd; + struct efi_setup_data *esd; + struct efi_mem_descriptor *maps; + int nr_maps, size, ret = 0; + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + ret = access("/sys/firmware/efi/systab", F_OK); + if (ret < 0) + goto out; + + esd = malloc(sizeof(struct efi_setup_data)); + if (!esd) { + ret = 1; + goto out; + } + memset(esd, 0, sizeof(struct efi_setup_data)); + ret = get_efi_values(esd); + if (ret) + goto free_esd; + nr_maps = get_efi_runtime_map(&maps); + if (!nr_maps) { + ret = 2; + goto free_esd; + } + sd = malloc(sizeof(struct setup_data) + sizeof(*esd)); + if (!sd) { + ret = 3; + goto free_maps; + } + + memset(sd, 0, sizeof(struct setup_data) + sizeof(*esd)); + sd->next = 0; + sd->type = SETUP_EFI; + sd->len = sizeof(*esd); + memcpy(sd->data, esd, sizeof(*esd)); + free(esd); + + add_setup_data(info, real_mode, sd); + + size = nr_maps * sizeof(struct efi_mem_descriptor); + memmap_paddr = add_buffer(info, maps, size, size, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); + ei->efi_memmap = memmap_paddr & 0xffffffff; + ei->efi_memmap_hi = memmap_paddr >> 32; + ei->efi_memmap_size = size; + ei->efi_memdesc_size = sizeof(struct efi_mem_descriptor); + + return 0; +free_maps: + free(maps); +free_esd: + free(esd); +out: + return ret; +} + +static void add_e820_map_from_mr(struct x86_linux_param_header *real_mode, + struct e820entry *e820, struct memory_range *range, int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + e820[i].addr = range[i].start; + e820[i].size = range[i].end - range[i].start + 1; + switch (range[i].type) { + case RANGE_RAM: + e820[i].type = E820_RAM; + break; + case RANGE_ACPI: + e820[i].type = E820_ACPI; + break; + case RANGE_ACPI_NVS: + e820[i].type = E820_NVS; + break; + case RANGE_PMEM: + e820[i].type = E820_PMEM; + break; + case RANGE_PRAM: + e820[i].type = E820_PRAM; + break; + default: + case RANGE_RESERVED: + e820[i].type = E820_RESERVED; + break; + } + dbgprintf("%016lx-%016lx (%d)\n", + e820[i].addr, + e820[i].addr + e820[i].size - 1, + e820[i].type); + + if (range[i].type != RANGE_RAM) + continue; + if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { + unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); + real_mode->ext_mem_k = mem_k; + real_mode->alt_mem_k = mem_k; + if (mem_k > 0xfc00) { + real_mode->ext_mem_k = 0xfc00; /* 64M */ + } + if (mem_k > 0xffffffff) { + real_mode->alt_mem_k = 0xffffffff; + } + } + } +} + +static void setup_e820_ext(struct kexec_info *info, struct x86_linux_param_header *real_mode, + struct memory_range *range, int nr_range) +{ + struct setup_data *sd; + struct e820entry *e820; + int nr_range_ext; + + nr_range_ext = nr_range - E820MAX; + sd = xmalloc(sizeof(struct setup_data) + nr_range_ext * sizeof(struct e820entry)); + sd->next = 0; + sd->len = nr_range_ext * sizeof(struct e820entry); + sd->type = SETUP_E820_EXT; + + e820 = (struct e820entry *) sd->data; + dbgprintf("Extended E820 via setup_data:\n"); + add_e820_map_from_mr(real_mode, e820, range + E820MAX, nr_range_ext); + add_setup_data(info, real_mode, sd); +} + +static void setup_e820(struct kexec_info *info, struct x86_linux_param_header *real_mode) +{ + struct memory_range *range; + int nr_range, nr_range_saved; + + + if (info->kexec_flags & KEXEC_ON_CRASH && !arch_options.pass_memmap_cmdline) { + range = info->crash_range; + nr_range = info->nr_crash_ranges; + } else { + range = info->memory_range; + nr_range = info->memory_ranges; + } + + nr_range_saved = nr_range; + if (nr_range > E820MAX) { + nr_range = E820MAX; + } + + real_mode->e820_map_nr = nr_range; + dbgprintf("E820 memmap:\n"); + add_e820_map_from_mr(real_mode, real_mode->e820_map, range, nr_range); + + if (nr_range_saved > E820MAX) { + dbgprintf("extra E820 memmap are passed via setup_data\n"); + setup_e820_ext(info, real_mode, range, nr_range_saved); + } +} + +static void setup_rng_seed(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + struct { + struct setup_data header; + uint8_t rng_seed[32]; + } *sd; + + sd = xmalloc(sizeof(*sd)); + sd->header.next = 0; + sd->header.len = sizeof(sd->rng_seed); + sd->header.type = SETUP_RNG_SEED; + + if (getrandom(sd->rng_seed, sizeof(sd->rng_seed), GRND_NONBLOCK) != + sizeof(sd->rng_seed)) + return; /* Not initialized, so don't pass a seed. */ + + add_setup_data(info, real_mode, &sd->header); +} + +static int +get_efi_mem_desc_version(struct x86_linux_param_header *real_mode) +{ + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + return ei->efi_memdesc_version; +} + +static void setup_efi_info(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int ret, desc_version; + off_t offset = offsetof(typeof(*real_mode), efi_info); + + ret = get_bootparam(&real_mode->efi_info, offset, 32); + if (ret) + return; + if (((struct efi_info *)real_mode->efi_info)->efi_memmap_size == 0) + /* zero filled efi_info */ + goto out; + desc_version = get_efi_mem_desc_version(real_mode); + if (desc_version != 1) { + fprintf(stderr, + "efi memory descriptor version %d is not supported!\n", + desc_version); + goto out; + } + ret = setup_efi_data(info, real_mode); + if (ret) + goto out; + + return; + +out: + memset(&real_mode->efi_info, 0, 32); + return; +} + +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int err; + + /* get subarch from running kernel */ + setup_subarch(real_mode); + if (bzImage_support_efi_boot && !arch_options.noefi) + setup_efi_info(info, real_mode); + + /* Default screen size */ + real_mode->orig_x = 0; + real_mode->orig_y = 0; + real_mode->orig_video_page = 0; + real_mode->orig_video_mode = 0; + real_mode->orig_video_cols = 80; + real_mode->orig_video_lines = 25; + real_mode->orig_video_ega_bx = 0; + real_mode->orig_video_isVGA = 1; + real_mode->orig_video_points = 16; + + /* setup vesa fb if possible, or just use original screen_info */ + err = setup_linux_vesafb(real_mode); + if (err) { + uint16_t cl_magic, cl_offset; + + /* save and restore the old cmdline param if needed */ + cl_magic = real_mode->cl_magic; + cl_offset = real_mode->cl_offset; + + err = get_bootparam(real_mode, 0, sizeof(struct screen_info)); + if (!err) { + real_mode->cl_magic = cl_magic; + real_mode->cl_offset = cl_offset; + } + } + /* Fill in the memsize later */ + real_mode->ext_mem_k = 0; + real_mode->alt_mem_k = 0; + real_mode->e820_map_nr = 0; + + /* Default APM info */ + memset(&real_mode->apm_bios_info, 0, sizeof(real_mode->apm_bios_info)); + /* Default drive info */ + memset(&real_mode->drive_info, 0, sizeof(real_mode->drive_info)); + /* Default sysdesc table */ + real_mode->sys_desc_table.length = 0; + + /* default yes: this can be overridden on the command line */ + real_mode->mount_root_rdonly = 0xFFFF; + + /* default /dev/hda + * this can be overrident on the command line if necessary. + */ + real_mode->root_dev = (0x3 <<8)| 0; + + /* another safe default */ + real_mode->aux_device_info = 0; + + setup_e820(info, real_mode); + + /* pass RNG seed */ + setup_rng_seed(info, real_mode); + + /* fill the EDD information */ + setup_edd_info(real_mode); + + /* Always try to fill acpi_rsdp_addr */ + real_mode->acpi_rsdp_addr = get_acpi_rsdp(); +} + +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len) +{ + struct setup_data *sd; + + sd = xmalloc(sizeof(struct setup_data) + dtb_len); + sd->next = 0; + sd->len = dtb_len; + sd->type = SETUP_DTB; + memcpy(sd->data, dtb_buf, dtb_len); + + + add_setup_data(info, real_mode, sd); +} diff --git a/kexec/arch/i386/x86-linux-setup.h b/kexec/arch/i386/x86-linux-setup.h new file mode 100644 index 0000000..b5e1ad5 --- /dev/null +++ b/kexec/arch/i386/x86-linux-setup.h @@ -0,0 +1,38 @@ +#ifndef X86_LINUX_SETUP_H +#define X86_LINUX_SETUP_H +#include <x86/x86-linux.h> + +void init_linux_parameters(struct x86_linux_param_header *real_mode); +void setup_linux_bootloader_parameters_high( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size, int initrd_high); +static inline void setup_linux_bootloader_parameters( + struct kexec_info *info, struct x86_linux_param_header *real_mode, + unsigned long real_mode_base, unsigned long cmdline_offset, + const char *cmdline, off_t cmdline_len, + const char *initrd_buf, off_t initrd_size) +{ + setup_linux_bootloader_parameters_high(info, + real_mode, real_mode_base, + cmdline_offset, cmdline, cmdline_len, + initrd_buf, initrd_size, 0); +} +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode); +void setup_linux_dtb(struct kexec_info *info, struct x86_linux_param_header *real_mode, + const char *dtb_buf, int dtb_len); +int get_bootparam(void *buf, off_t offset, size_t size); + + +#define SETUP_BASE 0x90000 +#define KERN32_BASE 0x100000 /* 1MB */ +#define INITRD_BASE 0x1000000 /* 16MB */ + +/* command line parameter may be appended by purgatory */ +#define PURGATORY_CMDLINE_SIZE 64 +extern int bzImage_support_efi_boot; +extern struct arch_options_t arch_options; + +#endif /* X86_LINUX_SETUP_H */ diff --git a/kexec/arch/ia64/Makefile b/kexec/arch/ia64/Makefile new file mode 100644 index 0000000..f5b212b --- /dev/null +++ b/kexec/arch/ia64/Makefile @@ -0,0 +1,16 @@ +# +# kexec ia64 (linux booting linux) +# +ia64_KEXEC_SRCS = kexec/arch/ia64/kexec-iomem.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-elf-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/kexec-elf-rel-ia64.c +ia64_KEXEC_SRCS += kexec/arch/ia64/crashdump-ia64.c + +ia64_PROC_IOMEM = + +dist += kexec/arch/ia64/Makefile $(ia64_KEXEC_SRCS) \ + kexec/arch/ia64/kexec-ia64.h kexec/arch/ia64/crashdump-ia64.h \ + kexec/arch/ia64/include/arch/options.h + + diff --git a/kexec/arch/ia64/crashdump-ia64.c b/kexec/arch/ia64/crashdump-ia64.c new file mode 100644 index 0000000..755ee5e --- /dev/null +++ b/kexec/arch/ia64/crashdump-ia64.c @@ -0,0 +1,293 @@ +/* + * kexec: crashdump support + * Copyright (C) 2005-2006 Zou Nan hai <nanhai.zou@intel.com> Intel Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ia64.h" +#include "crashdump-ia64.h" +#include "../kexec/crashdump.h" + +int memory_ranges = 0; +#define LOAD_OFFSET (0xa000000000000000UL + 0x100000000UL - \ + kernel_code_start) + +static struct crash_elf_info elf_info = +{ + class: ELFCLASS64, + data: ELFDATA2LSB, + machine: EM_IA_64, + page_offset: PAGE_OFFSET, +}; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region. + * The number of entries in memory_range array is always smaller than + * the number of entries in the file returned by proc_iomem(), + * stored in max_memory_ranges. */ +static struct memory_range *crash_memory_range; +/* Memory region reserved for storing panic kernel and other data. */ +static struct memory_range crash_reserved_mem; +unsigned long elfcorehdr; +static unsigned long kernel_code_start; +static unsigned long kernel_code_end; +struct loaded_segment { + unsigned long start; + unsigned long end; +}; + +#define MAX_LOAD_SEGMENTS 128 +struct loaded_segment loaded_segments[MAX_LOAD_SEGMENTS]; + +unsigned long loaded_segments_num, loaded_segments_base; +static int seg_comp(const void *a, const void *b) +{ + const struct loaded_segment *x = a, *y = b; + /* avoid overflow */ + if (x->start > y->start) return 1; + if (x->start < y->start) return -1; + return 0; +} + +/* purgatory code need this info to patch the EFI memmap + */ +static void add_loaded_segments_info(struct mem_ehdr *ehdr) +{ + unsigned i = 0; + while(i < ehdr->e_phnum) { + struct mem_phdr *phdr; + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) { + i++; + continue; + } + + loaded_segments[loaded_segments_num].start = + _ALIGN_DOWN(phdr->p_paddr, ELF_PAGE_SIZE); + loaded_segments[loaded_segments_num].end = + loaded_segments[loaded_segments_num].start; + + /* Consolidate consecutive PL_LOAD segments into one. + * The end addr of the last PL_LOAD segment, calculated by + * adding p_memsz to p_paddr & rounded up to ELF_PAGE_SIZE, + * will be the end address of this loaded_segments entry. + */ + while (i < ehdr->e_phnum) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + break; + loaded_segments[loaded_segments_num].end = + _ALIGN(phdr->p_paddr + phdr->p_memsz, + ELF_PAGE_SIZE); + i++; + } + loaded_segments_num++; + } +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_crash_reserve_region(int *nr_ranges) +{ + int i, j, tidx = -1; + unsigned long cstart, cend; + struct memory_range temp_region; + + /* Crash reserved region. */ + cstart = crash_reserved_mem.start; + cend = crash_reserved_mem.end; + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (cstart < mend && cend > mstart) { + if (cstart != mstart && cend != mend) { + /* Split memory region */ + crash_memory_range[i].end = cstart - 1; + temp_region.start = cend + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (cstart != mstart) + crash_memory_range[i].end = cstart - 1; + else + crash_memory_range[i].start = cend + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == max_memory_ranges) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx].start = temp_region.start; + crash_memory_range[tidx].end = temp_region.end; + crash_memory_range[tidx].type = temp_region.type; + (*nr_ranges)++; + } + return 0; +} + +static int get_crash_memory_ranges(int *ranges) +{ + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + unsigned long start, end; + + crash_memory_range = xmalloc(sizeof(struct memory_range) * + max_memory_ranges); + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + while(fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + if (memory_ranges >= max_memory_ranges) + break; + count = sscanf(line, "%lx-%lx : %n", + &start, &end, &consumed); + str = line + consumed; + if (count != 2) + continue; + + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Reserved memory region. New kernel can + * use this region to boot into. */ + crash_reserved_mem.start = start; + crash_reserved_mem.end = end; + crash_reserved_mem.type = RANGE_RAM; + continue; + } + else if (memcmp(str, "Kernel code\n", 12) == 0) { + kernel_code_start = start; + kernel_code_end = end; + continue; + } else if (memcmp(str, "Uncached RAM\n", 13) == 0) { + type = RANGE_UNCACHED; + } else { + continue; + } + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + } + fclose(fp); + if (exclude_crash_reserve_region(&memory_ranges) < 0) + return -1; + *ranges = memory_ranges; + return 0; +} + +/* + * Note that this assignes a malloced pointer to *cmdline, + * which is likely never freed by the caller + */ +static void +cmdline_add_elfcorehdr(const char **cmdline, unsigned long addr) +{ + char *str; + char buf[64]; + size_t len; + sprintf(buf, " elfcorehdr=%ldK", addr/1024); + len = strlen(*cmdline) + strlen(buf) + 1; + str = xmalloc(len); + sprintf(str, "%s%s", *cmdline, buf); + *cmdline = str; +} + +int load_crashdump_segments(struct kexec_info *info, struct mem_ehdr *ehdr, + unsigned long max_addr, unsigned long min_base, + const char **cmdline) +{ + int nr_ranges; + unsigned long sz; + size_t size; + void *tmp; + if (info->kexec_flags & KEXEC_ON_CRASH && + get_crash_memory_ranges(&nr_ranges) == 0) { + int i; + + elf_info.kern_paddr_start = kernel_code_start; + for (i=0; i < nr_ranges; i++) { + unsigned long long mstart = crash_memory_range[i].start; + unsigned long long mend = crash_memory_range[i].end; + if (!mstart && !mend) + continue; + if (kernel_code_start >= mstart && + kernel_code_start < mend) { + elf_info.kern_vaddr_start = mstart + LOAD_OFFSET; + break; + } + } + elf_info.kern_size = kernel_code_end - kernel_code_start + 1; + if (crash_create_elf64_headers(info, &elf_info, + crash_memory_range, nr_ranges, + &tmp, &sz, EFI_PAGE_SIZE) < 0) + return -1; + + elfcorehdr = add_buffer(info, tmp, sz, sz, EFI_PAGE_SIZE, + min_base, max_addr, -1); + loaded_segments[loaded_segments_num].start = elfcorehdr; + loaded_segments[loaded_segments_num].end = elfcorehdr + sz; + loaded_segments_num++; + cmdline_add_elfcorehdr(cmdline, elfcorehdr); + } + add_loaded_segments_info(ehdr); + size = sizeof(struct loaded_segment) * loaded_segments_num; + qsort(loaded_segments, loaded_segments_num, + sizeof(struct loaded_segment), seg_comp); + loaded_segments_base = add_buffer(info, loaded_segments, + size, size, 16, 0, max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__loaded_segments", + &loaded_segments_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__loaded_segments_num", + &loaded_segments_num, sizeof(long)); + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, + &end) == 0 ? (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/ia64/crashdump-ia64.h b/kexec/arch/ia64/crashdump-ia64.h new file mode 100644 index 0000000..72ba054 --- /dev/null +++ b/kexec/arch/ia64/crashdump-ia64.h @@ -0,0 +1,12 @@ +#ifndef CRASHDUMP_IA64_H +#define CRASHDUMP_IA64_H + +#define PAGE_OFFSET 0xe000000000000000UL +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +extern int load_crashdump_segments(struct kexec_info *info, + struct mem_ehdr *ehdr, unsigned long max_addr, + unsigned long min_base, const char **cmdline); + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) + +#endif diff --git a/kexec/arch/ia64/include/arch/options.h b/kexec/arch/ia64/include/arch/options.h new file mode 100644 index 0000000..e8754ad --- /dev/null +++ b/kexec/arch/ia64/include/arch/options.h @@ -0,0 +1,42 @@ +#ifndef KEXEC_ARCH_IA64_OPTIONS_H +#define KEXEC_ARCH_IA64_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_NOIO (OPT_ARCH_MAX+2) +#define OPT_VMM (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"initrd", 1, 0, OPT_RAMDISK}, \ + {"noio", 0, 0, OPT_NOIO}, \ + {"vmm", 1, 0, OPT_VMM}, \ + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + +#endif /* KEXEC_ARCH_IA64_OPTIONS_H */ diff --git a/kexec/arch/ia64/kexec-elf-ia64.c b/kexec/arch/ia64/kexec-elf-ia64.c new file mode 100644 index 0000000..142dee3 --- /dev/null +++ b/kexec/arch/ia64/kexec-elf-ia64.c @@ -0,0 +1,303 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2004 Albert Herranz + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + * Copyright (C) 2004 Khalid Aziz <khalid.aziz@hp.com> Hewlett Packard Co + * Copyright (C) 2005 Zou Nan hai <nanhai.zou@intel.com> Intel Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <limits.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "kexec-ia64.h" +#include "crashdump-ia64.h" +#include <arch/options.h> + +static const int probe_debug = 0; +extern unsigned long saved_efi_memmap_size; + +/* + * elf_ia64_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int elf_ia64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + if (probe_debug) { + fprintf(stderr, "Not an ELF executable\n"); + } + return -1; + } + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_IA_64) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + return -1; + } + return 0; +} + +void elf_ia64_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to " + "STRING.\n" + " --append=STRING Set the kernel command line to " + "STRING.\n" + " --initrd=FILE Use FILE as the kernel's initial " + "ramdisk.\n" + " --noio Disable I/O in purgatory code.\n" + " --vmm=FILE Use FILE as the kernel image for a\n" + " virtual machine monitor " + "(aka hypervisor)\n"); +} + +/* Move the crash kerenl physical offset to reserved region + */ +void move_loaded_segments(struct mem_ehdr *ehdr, unsigned long addr) +{ + unsigned i; + long offset = 0; + int found = 0; + struct mem_phdr *phdr; + for(i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type == PT_LOAD) { + offset = addr - phdr->p_paddr; + found++; + break; + } + } + if (!found) + die("move_loaded_segments: no PT_LOAD region 0x%016x\n", addr); + ehdr->e_entry += offset; + for(i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type == PT_LOAD) + phdr->p_paddr += offset; + } +} + +int elf_ia64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *command_line, *ramdisk=0, *vmm=0, *kernel_buf; + char *ramdisk_buf = NULL; + off_t ramdisk_size = 0, kernel_size; + unsigned long command_line_len; + unsigned long entry, max_addr, gp_value; + unsigned long command_line_base, ramdisk_base, image_base; + unsigned long efi_memmap_base, efi_memmap_size; + unsigned long boot_param_base; + unsigned long noio=0; + int result; + int opt; + char *efi_memmap_buf, *boot_param; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"noio", 0, 0, OPT_NOIO}, + {"vmm", 1, 0, OPT_VMM}, + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_NOIO: /* disable PIO and MMIO in purgatory code*/ + noio = 1; + break; + case OPT_VMM: + vmm = optarg; + break; + } + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 16; + } + + if (vmm) + kernel_buf = slurp_decompress_file(vmm, &kernel_size); + else { + kernel_buf = buf; + kernel_size = len; + } + + /* Parse the Elf file */ + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + if (result < 0) { + fprintf(stderr, "ELF parse failed\n"); + free_elf_info(&ehdr); + return result; + } + + if (info->kexec_flags & KEXEC_ON_CRASH ) { + if ((mem_min == 0x00) && (mem_max == ULONG_MAX)) { + fprintf(stderr, "Failed to find crash kernel region " + "in %s\n", proc_iomem()); + free_elf_info(&ehdr); + return -1; + } + move_loaded_segments(&ehdr, mem_min); + } else if (update_loaded_segments(&ehdr) < 0) { + fprintf(stderr, "Failed to place kernel\n"); + return -1; + } + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) { + fprintf(stderr, "ELF load failed\n"); + free_elf_info(&ehdr); + return result; + } + + + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x0, ULONG_MAX, -1, 0); + + + if (load_crashdump_segments(info, &ehdr, max_addr, 0, + &command_line) < 0) + return -1; + + // reverve 4k for ia64_boot_param + boot_param = xmalloc(4096); + boot_param_base = add_buffer(info, boot_param, 4096, 4096, 4096, 0, + max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__noio", + &noio, sizeof(long)); + + elf_rel_set_symbol(&info->rhdr, "__boot_param_base", + &boot_param_base, sizeof(long)); + + // reserve efi_memmap of actual size allocated in production kernel + efi_memmap_size = saved_efi_memmap_size; + efi_memmap_buf = xmalloc(efi_memmap_size); + efi_memmap_base = add_buffer(info, efi_memmap_buf, + efi_memmap_size, efi_memmap_size, 4096, 0, + max_addr, -1); + + elf_rel_set_symbol(&info->rhdr, "__efi_memmap_base", + &efi_memmap_base, sizeof(long)); + + elf_rel_set_symbol(&info->rhdr, "__efi_memmap_size", + &efi_memmap_size, sizeof(long)); + if (command_line) { + command_line_len = strlen(command_line) + 1; + } + if (command_line_len || (info->kexec_flags & KEXEC_ON_CRASH )) { + char *cmdline = xmalloc(command_line_len); + strcpy(cmdline, command_line); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + char buf[128]; + sprintf(buf," max_addr=%lluM min_addr=%lluM", + mem_max>>20, mem_min>>20); + command_line_len = strlen(cmdline) + strlen(buf) + 1; + cmdline = xrealloc(cmdline, command_line_len); + strcat(cmdline, buf); + } + + command_line_len = _ALIGN(command_line_len, 16); + command_line_base = add_buffer(info, cmdline, + command_line_len, command_line_len, + getpagesize(), 0UL, + max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__command_line_len", + &command_line_len, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__command_line", + &command_line_base, sizeof(long)); + } + + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_size); + ramdisk_base = add_buffer(info, ramdisk_buf, ramdisk_size, + ramdisk_size, + getpagesize(), 0, max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__ramdisk_base", + &ramdisk_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__ramdisk_size", + &ramdisk_size, sizeof(long)); + } + + if (vmm) { + image_base = add_buffer(info, buf, len, len, + getpagesize(), 0, max_addr, -1); + elf_rel_set_symbol(&info->rhdr, "__vmcode_base", + &image_base, sizeof(long)); + elf_rel_set_symbol(&info->rhdr, "__vmcode_size", + &len, sizeof(long)); + } + + gp_value = info->rhdr.rel_addr + 0x200000; + elf_rel_set_symbol(&info->rhdr, "__gp_value", &gp_value, + sizeof(gp_value)); + + elf_rel_set_symbol(&info->rhdr, "__kernel_entry", &entry, + sizeof(entry)); + free_elf_info(&ehdr); + return 0; +} diff --git a/kexec/arch/ia64/kexec-elf-rel-ia64.c b/kexec/arch/ia64/kexec-elf-rel-ia64.c new file mode 100644 index 0000000..500f247 --- /dev/null +++ b/kexec/arch/ia64/kexec-elf-rel-ia64.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2005-2006 Zou Nan hai (nanhai.zou@intel.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* pugatory relocation code + * Most of the code in this file is + * based on arch/ia64/kernel/module.c in Linux kernel + */ + + +/* Most of the code in this file is + * based on arch/ia64/kernel/module.c in Linux kernel + */ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +#define MAX_LTOFF ((uint64_t) (1 << 22)) + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_IA_64) { + return 0; + } + return 1; +} + +static void +ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val) +{ + uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +static inline uint64_t +bundle (const uint64_t insn) +{ + return insn & ~0xfUL; +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + uint64_t gp_value = ehdr->rel_addr + 0x200000; + switch(r_type) { + case R_IA64_NONE: + break; + case R_IA64_SEGREL64LSB: + case R_IA64_DIR64LSB: + *((uint64_t *)location) = value; + break; + case R_IA64_DIR32LSB: + *((uint32_t *)location) = value; + if (value != *((uint32_t *)location)) + goto overflow; + break; + case R_IA64_IMM64: + ia64_patch((uint64_t)location, 0x01fffefe000UL, + /* bit 63 -> 36 */ + (((value & 0x8000000000000000UL) >> 27) + /* bit 21 -> 21 */ + | ((value & 0x0000000000200000UL) << 0) + /* bit 16 -> 22 */ + | ((value & 0x00000000001f0000UL) << 6) + /* bit 7 -> 27 */ + | ((value & 0x000000000000ff80UL) << 20) + /* bit 0 -> 13 */ + | ((value & 0x000000000000007fUL) << 13))); + ia64_patch((uint64_t)location - 1, 0x1ffffffffffUL, value>>22); + break; + case R_IA64_IMM22: + if (value + (1 << 21) >= (1 << 22)) + die("value out of IMM22 range\n"); + ia64_patch((uint64_t)location, 0x01fffcfe000UL, + /* bit 21 -> 36 */ + (((value & 0x200000UL) << 15) + /* bit 16 -> 22 */ + | ((value & 0x1f0000UL) << 6) + /* bit 7 -> 27 */ + | ((value & 0x00ff80UL) << 20) + /* bit 0 -> 13 */ + | ((value & 0x00007fUL) << 13) )); + break; + case R_IA64_PCREL21B: { + uint64_t delta = ((int64_t)value - (int64_t)address)/16; + if (delta + (1 << 20) >= (1 << 21)) + die("value out of IMM21B range\n"); + value = ((int64_t)(value - bundle(address)))/16; + ia64_patch((uint64_t)location, 0x11ffffe000UL, + (((value & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((value & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); + } + break; + case R_IA64_PCREL64LSB: { + value = value - address; + put_unaligned(value, (uint64_t *)location); + } break; + case R_IA64_GPREL22: + case R_IA64_LTOFF22X: + if (value - gp_value + MAX_LTOFF/2 >= MAX_LTOFF) + die("value out of gp relative range"); + value -= gp_value; + ia64_patch((uint64_t)location, 0x01fffcfe000UL, + (((value & 0x200000UL) << 15) /* bit 21 -> 36 */ + |((value & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + |((value & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + |((value & 0x00007fUL) << 13) /* bit 0 -> 13 */)); + break; + case R_IA64_LDXMOV: + if (value - gp_value + MAX_LTOFF/2 >= MAX_LTOFF) + die("value out of gp relative range"); + ia64_patch((uint64_t)location, 0x1fff80fe000UL, 0x10000000000UL); + break; + case R_IA64_LTOFF22: + + default: + die("Unknown rela relocation: 0x%lx 0x%lx\n", + r_type, address); + break; + } + return; +overflow: + die("overflow in relocation type %lu val %llx\n", + r_type, value); +} diff --git a/kexec/arch/ia64/kexec-ia64.c b/kexec/arch/ia64/kexec-ia64.c new file mode 100644 index 0000000..418d997 --- /dev/null +++ b/kexec/arch/ia64/kexec-ia64.c @@ -0,0 +1,247 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2004 Albert Herranz + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sched.h> +#include <limits.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "elf.h" +#include "kexec-ia64.h" +#include <arch/options.h> + +/* The number of entries in memory_range array is always smaller than + * the number of entries in the file returned by proc_iomem(), + * stored in max_memory_ranges. */ +static struct memory_range *memory_range; +int max_memory_ranges; +static int memory_ranges; +unsigned long saved_efi_memmap_size; + +/* Reserve range for EFI memmap and Boot parameter */ +static int split_range(int range, unsigned long start, unsigned long end) +{ + unsigned long ram_end = memory_range[range - 1].end; + unsigned int type = memory_range[range - 1].type; + int i; + //align end and start to page size of EFI + start = _ALIGN_DOWN(start, 1UL<<12); + end = _ALIGN(end, 1UL<<12); + for (i = 0; i < range; i++) + if(memory_range[i].start <= start && memory_range[i].end >=end) + break; + if (i >= range) + return range; + range = i; + if (memory_range[range].start < start) { + memory_range[range].end = start; + range++; + } + memory_range[range].start = start; + memory_range[range].end = end; + memory_range[range].type = RANGE_RESERVED; + range++; + if (end < ram_end) { + memory_range[range].start = end; + memory_range[range].end = ram_end; + memory_range[range].type = type; + range++; + } + return range; +} + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + + /* allocate memory_range dynamically */ + max_memory_ranges = 0; + while(fgets(line, sizeof(line), fp) != 0) { + max_memory_ranges++; + } + memory_range = xmalloc(sizeof(struct memory_range) * + max_memory_ranges); + rewind(fp); + + while(fgets(line, sizeof(line), fp) != 0) { + unsigned long start, end; + char *str; + unsigned type; + int consumed; + int count; + if (memory_ranges >= max_memory_ranges) + break; + count = sscanf(line, "%lx-%lx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } + else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } + else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported + * values are subset of user defined values. + */ + + if (kexec_flags & KEXEC_ON_CRASH) { + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + continue; + } else if (memcmp(str, "Boot parameter\n", 14) == 0) { + memory_ranges = split_range(memory_ranges, start, end); + continue; + } else if (memcmp(str, "EFI Memory Map\n", 14) == 0) { + memory_ranges = split_range(memory_ranges, start, end); + saved_efi_memmap_size = end - start; + continue; + } else if (memcmp(str, "Uncached RAM\n", 13) == 0) { + type = RANGE_UNCACHED; + } else { + continue; + } + /* + * Check if this memory range can be coalesced with + * the previous range + */ + if ((memory_ranges > 0) && + (start == memory_range[memory_ranges-1].end) && + (type == memory_range[memory_ranges-1].type)) { + memory_range[memory_ranges-1].end = end; + } + else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + {"elf-ia64", elf_ia64_probe, elf_ia64_load, elf_ia64_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + /* This doesn't belong here! Some sort of arch_init() ? */ + + /* execute from monarch processor */ + cpu_set_t affinity; + CPU_ZERO(&affinity); + CPU_SET(0, &affinity); + sched_setaffinity(0, sizeof(affinity), &affinity); + + return 0; +} + +const struct arch_map_entry arches[] = { + { "ia64", KEXEC_ARCH_IA_64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +int update_loaded_segments(struct mem_ehdr *ehdr) +{ + int i; + unsigned u; + struct mem_phdr *phdr; + unsigned long start_addr = ULONG_MAX, end_addr = 0; + unsigned long align = 1UL<<26; /* 64M */ + unsigned long start, end; + + for (u = 0; u < ehdr->e_phnum; u++) { + phdr = &ehdr->e_phdr[u]; + if (phdr->p_type != PT_LOAD) + continue; + if (phdr->p_paddr < start_addr) + start_addr = phdr->p_paddr; + if ((phdr->p_paddr + phdr->p_memsz) > end_addr) + end_addr = phdr->p_paddr + phdr->p_memsz; + } + + for (i = 0; i < memory_ranges && memory_range[i].start <= start_addr; + i++) { + if (memory_range[i].type == RANGE_RAM && + memory_range[i].end > end_addr) + return 0; + } + + for (i = 0; i < memory_ranges; i++) { + if (memory_range[i].type != RANGE_RAM) + continue; + start = _ALIGN(memory_range[i].start, align); + end = memory_range[i].end; + if (end > start && (end - start) > (end_addr - start_addr)) { + move_loaded_segments(ehdr, start); + return 0; + } + } + + return -1; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + diff --git a/kexec/arch/ia64/kexec-ia64.h b/kexec/arch/ia64/kexec-ia64.h new file mode 100644 index 0000000..31e4041 --- /dev/null +++ b/kexec/arch/ia64/kexec-ia64.h @@ -0,0 +1,14 @@ +#ifndef KEXEC_IA64_H +#define KEXEC_IA64_H + +extern int max_memory_ranges; +int elf_ia64_probe(const char *buf, off_t len); +int elf_ia64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ia64_usage(void); +int update_loaded_segments(struct mem_ehdr *ehdr); +void move_loaded_segments(struct mem_ehdr *ehdr, unsigned long addr); + +#define EFI_PAGE_SIZE (1UL<<12) +#define ELF_PAGE_SIZE (1UL<<16) +#endif /* KEXEC_IA64_H */ diff --git a/kexec/arch/ia64/kexec-iomem.c b/kexec/arch/ia64/kexec-iomem.c new file mode 100644 index 0000000..7fd50cd --- /dev/null +++ b/kexec/arch/ia64/kexec-iomem.c @@ -0,0 +1,23 @@ +#include <stdint.h> +#include <stdio.h> +#include "../../kexec.h" +#include "../../crashdump.h" + +static const char proc_iomem_str[]= "/proc/iomem"; +static const char proc_iomem_machine_str[]= "/proc/iomem_machine"; + +/* + * On IA64 XEN the EFI tables are virtualised. + * For this reason on such systems /proc/iomem_machine is provided, + * which is based on the hypervisor's (machine's) EFI tables. + * If Xen is in use, then /proc/iomem is used for memory regions relating + * to the currently running dom0 kernel, and /proc/iomem_machine is used + * for regions relating to the machine itself or the hypervisor. + * If Xen is not in used, then /proc/iomem used. + */ +const char *proc_iomem(void) +{ + if (xen_present()) + return proc_iomem_machine_str; + return proc_iomem_str; +} diff --git a/kexec/arch/loongarch/Makefile b/kexec/arch/loongarch/Makefile new file mode 100644 index 0000000..3b33b96 --- /dev/null +++ b/kexec/arch/loongarch/Makefile @@ -0,0 +1,22 @@ +# +# kexec loongarch (linux booting linux) +# +loongarch_KEXEC_SRCS = kexec/arch/loongarch/kexec-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-elf-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-pei-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/kexec-elf-rel-loongarch.c +loongarch_KEXEC_SRCS += kexec/arch/loongarch/crashdump-loongarch.c + +loongarch_MEM_REGIONS = kexec/mem_regions.c + +loongarch_CPPFLAGS += -I $(srcdir)/kexec/ + +loongarch_ADD_BUFFER = +loongarch_ADD_SEGMENT = +loongarch_VIRT_TO_PHYS = + +dist += kexec/arch/loongarch/Makefile $(loongarch_KEXEC_SRCS) \ + kexec/arch/loongarch/kexec-loongarch.h \ + kexec/arch/loongarch/image-header.h \ + kexec/arch/loongarch/crashdump-loongarch.h \ + kexec/arch/loongarch/include/arch/options.h diff --git a/kexec/arch/loongarch/crashdump-loongarch.c b/kexec/arch/loongarch/crashdump-loongarch.c new file mode 100644 index 0000000..81250e4 --- /dev/null +++ b/kexec/arch/loongarch/crashdump-loongarch.c @@ -0,0 +1,220 @@ +/* + * LoongArch crashdump. + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * derived from crashdump-arm64.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/elf.h> + +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-loongarch.h" +#include "iomem.h" +#include "kexec-loongarch.h" +#include "kexec-elf.h" +#include "mem_regions.h" + +/* memory ranges of crashed kernel */ +static struct memory_ranges system_memory_rgns; + +/* memory range reserved for crashkernel */ +struct memory_range crash_reserved_mem[CRASH_MAX_RESERVED_RANGES]; +struct memory_ranges usablemem_rgns = { + .size = 0, + .max_size = CRASH_MAX_RESERVED_RANGES, + .ranges = crash_reserved_mem, +}; + +struct memory_range elfcorehdr_mem; + +static struct crash_elf_info elf_info64 = { + .class = ELFCLASS64, + .data = ELFDATA2LSB, + .machine = EM_LOONGARCH, + .page_offset = PAGE_OFFSET, +}; + +/* + * iomem_range_callback() - callback called for each iomem region + * @data: not used + * @nr: not used + * @str: name of the memory region + * @base: start address of the memory region + * @length: size of the memory region + * + * This function is called once for each memory region found in /proc/iomem. + * It locates system RAM and crashkernel reserved memory and places these to + * variables, respectively, system_memory_rgns and usablemem_rgns. + */ + +static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, unsigned long long base, + unsigned long long length) +{ + if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) + return mem_regions_alloc_and_add(&usablemem_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) + return mem_regions_alloc_and_add(&system_memory_rgns, + base, length, RANGE_RAM); + else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0) + elf_info64.kern_paddr_start = base; + else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0) + elf_info64.kern_size = base + length - elf_info64.kern_paddr_start; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + return usablemem_rgns.size; +} + +/* + * crash_get_memory_ranges() - read system physical memory + * + * Function reads through system physical memory and stores found memory + * regions in system_memory_ranges. + * Regions are sorted in ascending order. + * + * Returns 0 in case of success and a negative value otherwise. + */ +static int crash_get_memory_ranges(void) +{ + int i; + + /* + * First read all memory regions that can be considered as + * system memory including the crash area. + */ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + /* allow one or two regions for crash dump kernel */ + if (!usablemem_rgns.size) + return -EINVAL; + + dbgprint_mem_range("Reserved memory range", + usablemem_rgns.ranges, usablemem_rgns.size); + + for (i = 0; i < usablemem_rgns.size; i++) { + if (mem_regions_alloc_and_exclude(&system_memory_rgns, + &crash_reserved_mem[i])) { + fprintf(stderr, "Cannot allocate memory for ranges\n"); + return -ENOMEM; + } + } + + /* + * Make sure that the memory regions are sorted. + */ + mem_regions_sort(&system_memory_rgns); + + dbgprint_mem_range("Coredump memory ranges", + system_memory_rgns.ranges, system_memory_rgns.size); + + /* + * For additional kernel code/data segment. + * kern_paddr_start/kern_size are determined in iomem_range_callback + */ + elf_info64.kern_vaddr_start = get_kernel_sym("_text"); + if (!elf_info64.kern_vaddr_start) + elf_info64.kern_vaddr_start = UINT64_MAX; + + return 0; +} + +/* + * load_crashdump_segments() - load the elf core header + * @info: kexec info structure + * + * This function creates and loads an additional segment of elf core header + : which is used to construct /proc/vmcore on crash dump kernel. + * + * Return 0 in case of success and -1 in case of error. + */ + +int load_crashdump_segments(struct kexec_info *info) +{ + unsigned long elfcorehdr; + unsigned long bufsz; + void *buf; + int err; + + /* + * First fetch all the memory (RAM) ranges that we are going to + * pass to the crash dump kernel during panic. + */ + + err = crash_get_memory_ranges(); + + if (err) + return EFAILED; + + err = crash_create_elf64_headers(info, &elf_info64, + system_memory_rgns.ranges, system_memory_rgns.size, + &buf, &bufsz, ELF_CORE_HEADER_ALIGN); + + if (err) + return EFAILED; + + elfcorehdr = add_buffer(info, buf, bufsz, bufsz, 1024, + crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end, -1); + + elfcorehdr_mem.start = elfcorehdr; + elfcorehdr_mem.end = elfcorehdr + bufsz - 1; + + dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__, + elfcorehdr_mem.start, elfcorehdr_mem.end); + + return 0; +} + +/* + * e_entry and p_paddr are actually in virtual address space. + * Those values will be translated to physcal addresses by using + * virt_to_phys() in add_segment(). + * So let's fix up those values for later use so the memory base will be + * correctly replaced with crash_reserved_mem[usablemem_rgns.size - 1].start. + */ +void fixup_elf_addrs(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr; + int i; + + ehdr->e_entry += crash_reserved_mem[usablemem_rgns.size - 1].start; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) + continue; + phdr->p_paddr += crash_reserved_mem[usablemem_rgns.size - 1].start; + } +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + if (!usablemem_rgns.size) + kexec_iomem_for_each_line(NULL, iomem_range_callback, NULL); + + if (!usablemem_rgns.size) + return -1; + + *start = crash_reserved_mem[usablemem_rgns.size - 1].start; + *end = crash_reserved_mem[usablemem_rgns.size - 1].end; + + return 0; +} diff --git a/kexec/arch/loongarch/crashdump-loongarch.h b/kexec/arch/loongarch/crashdump-loongarch.h new file mode 100644 index 0000000..25ff24b --- /dev/null +++ b/kexec/arch/loongarch/crashdump-loongarch.h @@ -0,0 +1,26 @@ +#ifndef CRASHDUMP_LOONGARCH_H +#define CRASHDUMP_LOONGARCH_H + +struct kexec_info; +extern struct memory_ranges usablemem_rgns; +extern struct memory_range crash_reserved_mem[]; +extern struct memory_range elfcorehdr_mem; + +int load_crashdump_segments(struct kexec_info *info); +int is_crashkernel_mem_reserved(void); +void fixup_elf_addrs(struct mem_ehdr *ehdr); +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end); + +#define PAGE_OFFSET 0x9000000000000000ULL +#define MAXMEM 0 + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) +#define CRASH_MAX_MEMORY_RANGES (MAX_MEMORY_RANGES + 2) + +/* crash dump kernel support at most two regions, low_region and high region. */ +#define CRASH_MAX_RESERVED_RANGES 2 + +#define COMMAND_LINE_SIZE 512 + +extern struct arch_options_t arch_options; +#endif /* CRASHDUMP_LOONGARCH_H */ diff --git a/kexec/arch/loongarch/image-header.h b/kexec/arch/loongarch/image-header.h new file mode 100644 index 0000000..3b75765 --- /dev/null +++ b/kexec/arch/loongarch/image-header.h @@ -0,0 +1,79 @@ +/* + * LoongArch binary image header. + */ + +#if !defined(__LOONGARCH_IMAGE_HEADER_H) +#define __LOONGARCH_IMAGE_HEADER_H + +#include <endian.h> +#include <stdint.h> + +/** + * struct loongarch_image_header + * + * @pe_sig: Optional PE format 'MZ' signature. + * @reserved_1: Reserved. + * @kernel_entry: Kernel image entry pointer. + * @image_size: An estimated size of the memory image size in LSB byte order. + * @text_offset: The image load offset in LSB byte order. + * @reserved_2: Reserved. + * @reserved_3: Reserved. + * @pe_header: Optional offset to a PE format header. + **/ + +struct loongarch_image_header { + uint8_t pe_sig[2]; + uint16_t reserved_1[3]; + uint64_t kernel_entry; + uint64_t image_size; + uint64_t text_offset; + uint64_t reserved_2[3]; + uint32_t reserved_3; + uint32_t pe_header; +}; + +static const uint8_t loongarch_image_pe_sig[2] = {'M', 'Z'}; + +/** + * loongarch_header_check_pe_sig - Helper to check the loongarch image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int loongarch_header_check_pe_sig(const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == loongarch_image_pe_sig[0] + && h->pe_sig[1] == loongarch_image_pe_sig[1]); +} + +static inline uint64_t loongarch_header_text_offset( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->text_offset); +} + +static inline uint64_t loongarch_header_image_size( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->image_size); +} + +static inline uint64_t loongarch_header_kernel_entry( + const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return le64toh(h->kernel_entry); +} + +#endif diff --git a/kexec/arch/loongarch/include/arch/options.h b/kexec/arch/loongarch/include/arch/options.h new file mode 100644 index 0000000..25a7dc1 --- /dev/null +++ b/kexec/arch/loongarch/include/arch/options.h @@ -0,0 +1,28 @@ +#ifndef KEXEC_ARCH_LOONGARCH_OPTIONS_H +#define KEXEC_ARCH_LOONGARCH_OPTIONS_H + +#define OPT_APPEND ((OPT_MAX)+0) +#define OPT_INITRD ((OPT_MAX)+1) +#define OPT_REUSE_CMDLINE ((OPT_MAX)+2) +#define OPT_ARCH_MAX ((OPT_MAX)+3) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "append", 1, NULL, OPT_APPEND }, \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "initrd", 1, NULL, OPT_INITRD }, \ + { "ramdisk", 1, NULL, OPT_INITRD }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR /* Only accept long arch options. */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +static const char loongarch_opts_usage[] __attribute__ ((unused)) = +" --append=STRING Set the kernel command line to STRING.\n" +" --command-line=STRING Set the kernel command line to STRING.\n" +" --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --ramdisk=FILE Use FILE as the kernel initial ramdisk.\n" +" --reuse-cmdline Use kernel command line from running system.\n"; + +#endif /* KEXEC_ARCH_LOONGARCH_OPTIONS_H */ diff --git a/kexec/arch/loongarch/kexec-elf-loongarch.c b/kexec/arch/loongarch/kexec-elf-loongarch.c new file mode 100644 index 0000000..45387ca --- /dev/null +++ b/kexec/arch/loongarch/kexec-elf-loongarch.c @@ -0,0 +1,125 @@ +/* + * kexec-elf-loongarch.c - kexec Elf loader for loongarch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE + +#include <limits.h> +#include <errno.h> +#include <elf.h> + +#include "kexec.h" +#include "kexec-elf.h" +#include "kexec-syscall.h" +#include "crashdump-loongarch.h" +#include "kexec-loongarch.h" +#include "arch/options.h" + +off_t initrd_base, initrd_size; + +int elf_loongarch_probe(const char *kernel_buf, off_t kernel_size) +{ + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + if (result < 0) { + dbgprintf("%s: Not an ELF executable.\n", __func__); + goto out; + } + + /* Verify the architecuture specific bits. */ + if (ehdr.e_machine != EM_LOONGARCH) { + dbgprintf("%s: Not an LoongArch ELF executable.\n", __func__); + result = -1; + goto out; + } + + result = 0; +out: + free_elf_info(&ehdr); + return result; +} + +int elf_loongarch_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct loongarch_image_header *header = NULL; + unsigned long kernel_segment; + struct mem_ehdr ehdr; + int result; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: build_elf_exec_info failed\n", __func__); + goto exit; + } + + kernel_segment = loongarch_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: image_size: %016lx\n", __func__, + kernel_size); + dbgprintf("%s: text_offset: %016lx\n", __func__, + loongarch_mem.text_offset); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + loongarch_mem.phys_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (loongarch_header_check_pe_sig(header) ? "yes" : "no")); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in elf header in order to load + * vmlinux (elf_exec) into crash kernel's memory. + */ + fixup_elf_addrs(&ehdr); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + result = elf_exec_load(&ehdr, info); + + if (result) { + dbgprintf("%s: elf_exec_load failed\n", __func__); + goto exit; + } + + /* load additional data */ + result = loongarch_load_other_segments(info, kernel_segment + kernel_size); + +exit: + free_elf_info(&ehdr); + if (result) + fprintf(stderr, "kexec: Bad elf image file, load failed.\n"); + return result; +} + +void elf_loongarch_usage(void) +{ + printf( +" An LoongArch ELF image, little endian.\n" +" Typically vmlinux or a stripped version of vmlinux.\n\n"); +} diff --git a/kexec/arch/loongarch/kexec-elf-rel-loongarch.c b/kexec/arch/loongarch/kexec-elf-rel-loongarch.c new file mode 100644 index 0000000..59f7f5d --- /dev/null +++ b/kexec/arch/loongarch/kexec-elf-rel-loongarch.c @@ -0,0 +1,42 @@ +/* + * kexec-elf-rel-loongarch.c - kexec Elf relocation routines + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + + if (ehdr->ei_class != ELFCLASS32) + return 0; + + if (ehdr->e_machine != EM_LOONGARCH) + return 0; + + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/loongarch/kexec-loongarch.c b/kexec/arch/loongarch/kexec-loongarch.c new file mode 100644 index 0000000..f47c998 --- /dev/null +++ b/kexec/arch/loongarch/kexec-loongarch.c @@ -0,0 +1,375 @@ +/* + * kexec-loongarch.c - kexec for loongarch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <linux/elf-em.h> +#include <elf.h> +#include <elf_info.h> + +#include "kexec.h" +#include "kexec-loongarch.h" +#include "crashdump-loongarch.h" +#include "iomem.h" +#include "kexec-syscall.h" +#include "mem_regions.h" +#include "arch/options.h" + +#define CMDLINE_PREFIX "kexec " +static char cmdline[COMMAND_LINE_SIZE] = CMDLINE_PREFIX; + +/* Adds "initrd=start,size" parameters to command line. */ +static int cmdline_add_initrd(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + ptr = str; + strcpy(str, " initrd="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, ","); + ptr = str + strlen(str); + ultoa(size, ptr); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the appropriate "mem=size@start" options to command line, indicating the + * memory region the new kernel can use to boot into. */ +static int cmdline_add_mem(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " mem="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the "elfcorehdr=size@start" command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Return a sorted list of memory ranges. */ +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + char *str; + int type, consumed, count; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno)); + return -1; + } + + while (fgets(line, sizeof(line), fp) != 0) { + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) + type = RANGE_RAM; + else if (!strncmp(str, IOMEM_RESERVED, strlen(IOMEM_RESERVED))) + type = RANGE_RESERVED; + else + continue; + + if (memory_ranges > 0 && + memory_range[memory_ranges - 1].end == start && + memory_range[memory_ranges - 1].type == type) { + memory_range[memory_ranges - 1].end = end; + } else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + + dbgprint_mem_range("MEMORY RANGES:", *range, *ranges); + return 0; +} + +struct file_type file_type[] = { + {"elf-loongarch", elf_loongarch_probe, elf_loongarch_load, elf_loongarch_usage}, + {"pei-loongarch", pei_loongarch_probe, pei_loongarch_load, pei_loongarch_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +/* loongarch global varables. */ + +struct loongarch_mem loongarch_mem; + +/** + * loongarch_process_image_header - Process the loongarch image header. + */ + +int loongarch_process_image_header(const struct loongarch_image_header *h) +{ + + if (!loongarch_header_check_pe_sig(h)) + return EFAILED; + + if (h->image_size) { + loongarch_mem.text_offset = loongarch_header_text_offset(h); + loongarch_mem.image_size = loongarch_header_image_size(h); + } + + return 0; +} + +void arch_usage(void) +{ + printf(loongarch_opts_usage); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 }, + }; + int opt; + char *cmdline = NULL; + const char *append = NULL; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + remove_parameter(cmdline, "kexec"); + remove_parameter(cmdline, "initrd"); + break; + case OPT_INITRD: + arch_options.initrd_file = optarg; + break; + default: + break; + } + } + + arch_options.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arch_options.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arch_options.initrd_file); + + return 0; +} + +const struct arch_map_entry arches[] = { + { "loongarch64", KEXEC_ARCH_LOONGARCH }, + { NULL, 0 }, +}; + +unsigned long loongarch_locate_kernel_segment(struct kexec_info *info) +{ + unsigned long hole; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + unsigned long hole_end; + + hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ? + mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start) + + loongarch_mem.text_offset; + hole = _ALIGN_UP(hole, MiB(1)); + hole_end = hole + loongarch_mem.text_offset + loongarch_mem.image_size; + + if ((hole_end > mem_max) || + (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) { + dbgprintf("%s: Crash kernel out of range\n", __func__); + hole = ULONG_MAX; + } + } else { + hole = locate_hole(info, + loongarch_mem.text_offset + loongarch_mem.image_size, + MiB(1), 0, ULONG_MAX, 1); + + if (hole == ULONG_MAX) + dbgprintf("%s: locate_hole failed\n", __func__); + } + + return hole; +} + +/* + * loongarch_load_other_segments - Prepare the initrd and cmdline segments. + */ + +int loongarch_load_other_segments(struct kexec_info *info, unsigned long hole_min) +{ + unsigned long initrd_min, hole_max; + char *initrd_buf = NULL; + unsigned long pagesize = getpagesize(); + + if (arch_options.command_line) { + if (strlen(arch_options.command_line) > + sizeof(cmdline) - 1) { + fprintf(stderr, + "Kernel command line too long for kernel!\n"); + return EFAILED; + } + + strncat(cmdline, arch_options.command_line, sizeof(cmdline) - 1); + } + + /* Put the other segments after the image. */ + + initrd_min = hole_min; + if (info->kexec_flags & KEXEC_ON_CRASH) + hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end; + else + hole_max = ULONG_MAX; + + if (arch_options.initrd_file) { + + initrd_buf = slurp_decompress_file(arch_options.initrd_file, &initrd_size); + + initrd_base = add_buffer(info, initrd_buf, initrd_size, + initrd_size, sizeof(void *), + _ALIGN_UP(initrd_min, + pagesize), hole_max, 1); + dbgprintf("initrd_base: %lx, initrd_size: %lx\n", initrd_base, initrd_size); + + cmdline_add_initrd(cmdline, initrd_base, initrd_size); + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + cmdline_add_elfcorehdr(cmdline, elfcorehdr_mem.start, + elfcorehdr_mem.end - elfcorehdr_mem.start + 1); + + cmdline_add_mem(cmdline, crash_reserved_mem[usablemem_rgns.size - 1].start, + crash_reserved_mem[usablemem_rgns.size - 1].end - + crash_reserved_mem[usablemem_rgns.size - 1].start + 1); + } + + cmdline[sizeof(cmdline) - 1] = 0; + add_buffer(info, cmdline, sizeof(cmdline), sizeof(cmdline), + sizeof(void *), _ALIGN_UP(hole_min, getpagesize()), + 0xffffffff, 1); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, cmdline); + + return 0; + +} + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr & ((1ULL << 48) - 1); +} + +/* + * add_segment() should convert base to a physical address on loongarch, + * while the default is just to work with base as is + */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, virt_to_phys(base), memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on loongarch, + * while the default is just to work with base as is + */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} diff --git a/kexec/arch/loongarch/kexec-loongarch.h b/kexec/arch/loongarch/kexec-loongarch.h new file mode 100644 index 0000000..5120a26 --- /dev/null +++ b/kexec/arch/loongarch/kexec-loongarch.h @@ -0,0 +1,60 @@ +#ifndef KEXEC_LOONGARCH_H +#define KEXEC_LOONGARCH_H + +#include <sys/types.h> + +#include "image-header.h" + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 + +#define CORE_TYPE_ELF64 1 + +#define COMMAND_LINE_SIZE 512 + +#define KiB(x) ((x) * 1024UL) +#define MiB(x) (KiB(x) * 1024UL) + +int elf_loongarch_probe(const char *kernel_buf, off_t kernel_size); +int elf_loongarch_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_loongarch_usage(void); + +int pei_loongarch_probe(const char *buf, off_t len); +int pei_loongarch_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void pei_loongarch_usage(void); + +int loongarch_process_image_header(const struct loongarch_image_header *h); + +unsigned long loongarch_locate_kernel_segment(struct kexec_info *info); +int loongarch_load_other_segments(struct kexec_info *info, + unsigned long hole_min); + +struct arch_options_t { + char *command_line; + char *initrd_file; + char *dtb; + int core_header_type; +}; + +/** + * struct loongarch_mem - Memory layout info. + */ + +struct loongarch_mem { + uint64_t phys_offset; + uint64_t text_offset; + uint64_t image_size; +}; + +extern struct loongarch_mem loongarch_mem; + +extern struct memory_ranges usablemem_rgns; +extern struct arch_options_t arch_options; +extern off_t initrd_base, initrd_size; + +#endif /* KEXEC_LOONGARCH_H */ diff --git a/kexec/arch/loongarch/kexec-pei-loongarch.c b/kexec/arch/loongarch/kexec-pei-loongarch.c new file mode 100644 index 0000000..1a11103 --- /dev/null +++ b/kexec/arch/loongarch/kexec-pei-loongarch.c @@ -0,0 +1,124 @@ +/* + * LoongArch kexec PE format binary image support. + * + * Copyright (C) 2022 Loongson Technology Corporation Limited. + * Youling Tang <tangyouling@loongson.cn> + * + * derived from kexec-image-arm64.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE + +#include <limits.h> +#include <errno.h> +#include <elf.h> + +#include "kexec.h" +#include "kexec-elf.h" +#include "image-header.h" +#include "kexec-syscall.h" +#include "crashdump-loongarch.h" +#include "kexec-loongarch.h" +#include "arch/options.h" + +int pei_loongarch_probe(const char *kernel_buf, off_t kernel_size) +{ + const struct loongarch_image_header *h; + + if (kernel_size < sizeof(struct loongarch_image_header)) { + dbgprintf("%s: No loongarch image header.\n", __func__); + return -1; + } + + h = (const struct loongarch_image_header *)(kernel_buf); + + if (!loongarch_header_check_pe_sig(h)) { + dbgprintf("%s: Bad loongarch PE image header.\n", __func__); + return -1; + } + + return 0; +} + +int pei_loongarch_load(int argc, char **argv, const char *buf, + off_t len, struct kexec_info *info) +{ + int result; + unsigned long hole_min = 0; + unsigned long kernel_segment, kernel_entry; + const struct loongarch_image_header *header; + + header = (const struct loongarch_image_header *)(buf); + + if (loongarch_process_image_header(header)) + return EFAILED; + + kernel_segment = loongarch_locate_kernel_segment(info); + + if (kernel_segment == ULONG_MAX) { + dbgprintf("%s: Kernel segment is not allocated\n", __func__); + result = EFAILED; + goto exit; + } + + kernel_entry = virt_to_phys(loongarch_header_kernel_entry(header)); + + if (info->kexec_flags & KEXEC_ON_CRASH) + /* + * offset addresses in order to load vmlinux.efi into + * crash kernel's memory. + */ + kernel_entry += crash_reserved_mem[usablemem_rgns.size - 1].start; + + dbgprintf("%s: kernel_segment: %016lx\n", __func__, kernel_segment); + dbgprintf("%s: kernel_entry: %016lx\n", __func__, kernel_entry); + dbgprintf("%s: image_size: %016lx\n", __func__, + loongarch_mem.image_size); + dbgprintf("%s: text_offset: %016lx\n", __func__, + loongarch_mem.text_offset); + dbgprintf("%s: phys_offset: %016lx\n", __func__, + loongarch_mem.phys_offset); + dbgprintf("%s: PE format: %s\n", __func__, + (loongarch_header_check_pe_sig(header) ? "yes" : "no")); + + /* Get kernel entry point */ + info->entry = (void *)kernel_entry; + + hole_min = kernel_segment + loongarch_mem.image_size; + + /* Create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* Load the kernel */ + add_segment(info, buf, len, kernel_segment, loongarch_mem.image_size); + + /* Prepare and load dtb and initrd data */ + result = loongarch_load_other_segments(info, hole_min); + if (result) { + fprintf(stderr, "kexec: Load dtb and initrd segments failed.\n"); + goto exit; + } + +exit: + if (result) + fprintf(stderr, "kexec: load failed.\n"); + + return result; +} + +void pei_loongarch_usage(void) +{ + printf( +" An LoongArch PE format binary image, uncompressed, little endian.\n" +" Typically a vmlinux.efi file.\n\n"); +} diff --git a/kexec/arch/m68k/Makefile b/kexec/arch/m68k/Makefile new file mode 100644 index 0000000..eeaacbd --- /dev/null +++ b/kexec/arch/m68k/Makefile @@ -0,0 +1,15 @@ +# +# kexec m68k (linux booting linux) +# +m68k_KEXEC_SRCS = kexec/arch/m68k/kexec-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-rel-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/bootinfo.c + +m68k_ADD_SEGMENT = +m68k_VIRT_TO_PHYS = + +dist += kexec/arch/m68k/Makefile $(m68k_KEXEC_SRCS) \ + kexec/arch/m68k/bootinfo.h \ + kexec/arch/m68k/kexec-m68k.h \ + kexec/arch/m68k/include/arch/options.h diff --git a/kexec/arch/m68k/bootinfo.c b/kexec/arch/m68k/bootinfo.c new file mode 100644 index 0000000..086a34b --- /dev/null +++ b/kexec/arch/m68k/bootinfo.c @@ -0,0 +1,285 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/random.h> + +#include "../../kexec.h" + +#include "bootinfo.h" + +const char *bootinfo_file = DEFAULT_BOOTINFO_FILE; +static struct bi_rec *bootinfo; +static off_t bootinfo_size; + +static unsigned int num_memchunks; + +static struct bi_rec *bi_next(struct bi_rec *bi, uint16_t size) +{ + return (void *)((unsigned long)bi + size); +} + +static struct bi_rec *bi_find(struct bi_rec *prev, uint16_t tag) +{ + struct bi_rec *bi = prev ? bi_next(prev, prev->size) : bootinfo; + + for (bi = prev ? bi_next(prev, prev->size) : bootinfo; + bi->tag != BI_LAST; bi = bi_next(bi, bi->size)) + if (bi->tag == tag) + return bi; + return NULL; +} + +static void bi_remove(uint16_t tag) +{ + struct bi_rec *bi; + off_t rem; + uint16_t size; + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (bi->tag == BI_LAST) + break; + + size = bi->size; + if (bi->tag == tag) { + memmove(bi, bi_next(bi, size), rem - size); + bootinfo_size -= size; + rem -= size; + continue; + } + + bi = bi_next(bi, size); + rem -= size; + } +} + +static struct bi_rec *bi_add(uint16_t tag, uint16_t size) +{ + struct bi_rec *bi; + + /* Add 4-byte header and round up to multiple of 4 bytes */ + size = _ALIGN_UP(4 + size, 4); + + bootinfo = xrealloc(bootinfo, bootinfo_size + size); + + /* Replace old sentinel by new record */ + bi = bi_next(bootinfo, bootinfo_size - 2); + bootinfo_size += size; + memset(bi, 0, size); + bi->tag = tag; + bi->size = size; + + /* Re-add sentinel */ + bi_next(bi, size)->tag = BI_LAST; + + return bi; +} + +void bootinfo_load(void) +{ + struct bi_rec *bi; + off_t rem; + uint16_t tag, size; + + dbgprintf("Loading bootinfo from %s\n", bootinfo_file); + bootinfo = (void *)slurp_file_len(bootinfo_file, MAX_BOOTINFO_SIZE, + &bootinfo_size); + if (!bootinfo) + die("No bootinfo\n"); + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (rem < 2) + die("Unexpected end of bootinfo\n"); + + tag = bi->tag; + if (tag == BI_LAST) { + rem -= 2; + break; + } + + if (rem < 4) + die("Unexpected end of bootinfo\n"); + + size = bi->size; + if (size < 4 || size % 4) + die("Invalid tag size\n"); + if (rem < size) + die("Unexpected end of bootinfo\n"); + + if (tag == BI_MEMCHUNK) + num_memchunks++; + + bi = bi_next(bi, size); + rem -= size; + } + + if (rem) + die("Trailing data at end of bootinfo\n"); +} + +void bootinfo_print(void) +{ + struct bi_rec *bi = bootinfo; + uint16_t tag, size; + + while (1) { + tag = bi->tag; + if (tag == BI_LAST) { + puts("BI_LAST"); + break; + } + + size = bi->size; + switch (tag) { + case BI_MACHTYPE: + printf("BI_MACHTYPE: 0x%08x\n", bi->machtype); + break; + + case BI_MEMCHUNK: + printf("BI_MEMCHUNK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_RAMDISK: + printf("BI_RAMDISK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_COMMAND_LINE: + printf("BI_COMMAND_LINE: %s\n", bi->string); + break; + + case BI_RNG_SEED: + /* These are secret, so never print them to the console */ + printf("BI_RNG_SEED: 0x%08x bytes\n", be16_to_cpu(bi->rng_seed.len)); + break; + + default: + printf("BI tag 0x%04x size %u\n", tag, size); + break; + } + bi = bi_next(bi, size); + } +} + +int bootinfo_get_memory_ranges(struct memory_range **range) +{ + struct memory_range *ranges; + unsigned int i; + struct bi_rec *bi; + + ranges = xmalloc(num_memchunks * sizeof(struct memory_range)); + for (i = 0, bi = NULL; + i < num_memchunks && (bi = bi_find(bi, BI_MEMCHUNK)); i++) { + ranges[i].start = bi->mem_info.addr; + ranges[i].end = bi->mem_info.addr + bi->mem_info.size - 1; + ranges[i].type = RANGE_RAM; + } + + *range = ranges; + return i; +} + +void bootinfo_set_cmdline(const char *cmdline) +{ + struct bi_rec *bi; + uint16_t size; + + /* Remove existing command line records */ + bi_remove(BI_COMMAND_LINE); + + if (!cmdline) + return; + + /* Add new command line record */ + size = strlen(cmdline) + 1; + bi = bi_add(BI_COMMAND_LINE, size); + memcpy(bi->string, cmdline, size); +} + +void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size) +{ + struct bi_rec *bi; + + /* Remove existing ramdisk records */ + bi_remove(BI_RAMDISK); + + if (!ramdisk_size) + return; + + /* Add new ramdisk record */ + bi = bi_add(BI_RAMDISK, sizeof(bi->mem_info)); + bi->mem_info.addr = ramdisk_addr; + bi->mem_info.size = ramdisk_size; +} + +void bootinfo_add_rng_seed(void) +{ + enum { RNG_SEED_LEN = 32 }; + struct bi_rec *bi; + + /* Remove existing rng seed records */ + bi_remove(BI_RNG_SEED); + + /* Add new rng seed record */ + bi = bi_add(BI_RNG_SEED, sizeof(bi->rng_seed) + RNG_SEED_LEN); + if (getrandom(bi->rng_seed.data, RNG_SEED_LEN, GRND_NONBLOCK) != RNG_SEED_LEN) { + bi_remove(BI_RNG_SEED); + return; + } + bi->rng_seed.len = cpu_to_be16(RNG_SEED_LEN); +} + + + /* + * Check the bootinfo version in the kernel image + * All failures are non-fatal, as kexec may be used to load + * non-Linux images + */ + +void bootinfo_check_bootversion(const struct kexec_info *info) +{ + struct bi_rec *bi; + const struct bootversion *bv; + uint16_t major, minor; + unsigned int i; + + bv = info->segment[0].buf; + if (bv->magic != BOOTINFOV_MAGIC) { + printf("WARNING: No bootversion in kernel image\n"); + return; + } + + bi = bi_find(NULL, BI_MACHTYPE); + if (!bi) { + printf("WARNING: No machtype in bootinfo\n"); + return; + } + + for (i = 0; bv->machversions[i].machtype != bi->machtype; i++) + if (!bv->machversions[i].machtype) { + printf("WARNING: Machtype 0x%08x not in kernel bootversion\n", + bi->machtype); + return; + } + + major = BI_VERSION_MAJOR(bv->machversions[i].version); + minor = BI_VERSION_MINOR(bv->machversions[i].version); + dbgprintf("Kernel uses bootversion %u.%u\n", major, minor); + if (major != SUPPORTED_BOOTINFO_VERSION) + printf("WARNING: Kernel bootversion %u.%u is too %s for this kexec (expected %u.x)\n", + major, minor, + major < SUPPORTED_BOOTINFO_VERSION ? "old" : "new", + SUPPORTED_BOOTINFO_VERSION); +} + +void add_bootinfo(struct kexec_info *info, unsigned long addr) +{ + add_buffer(info, bootinfo, bootinfo_size, bootinfo_size, + sizeof(void *), addr, 0x0fffffff, 1); +} diff --git a/kexec/arch/m68k/bootinfo.h b/kexec/arch/m68k/bootinfo.h new file mode 100644 index 0000000..bb8a03a --- /dev/null +++ b/kexec/arch/m68k/bootinfo.h @@ -0,0 +1,53 @@ +#include <asm/bootinfo.h> + +#define DEFAULT_BOOTINFO_FILE "/proc/bootinfo" +#define MAX_BOOTINFO_SIZE 1536 + +/* New in 6.2's <asm/bootinfo.h>. Remove once 6.2 is baseline version. */ +#ifndef BI_RNG_SEED +#define BI_RNG_SEED 0x0008 +#endif + + + /* + * Convenience overlay of several struct bi_record variants + */ + +struct bi_rec { + __be16 tag; + __be16 size; + union { + __be32 data[0]; + /* shorthands for the types we use */ + __be32 machtype; + struct { + __be32 addr; + __be32 size; + } mem_info; + char string[0]; + struct { + __be16 len; + __u8 data[0]; + } rng_seed; + }; +}; + + + /* + * We only support the "new" tagged bootinfo (v2) + */ + +#define SUPPORTED_BOOTINFO_VERSION 2 + + +extern const char *bootinfo_file; + +extern void bootinfo_load(void); +extern void bootinfo_print(void); +extern int bootinfo_get_memory_ranges(struct memory_range **range); +extern void bootinfo_set_cmdline(const char *cmdline); +extern void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size); +extern void bootinfo_add_rng_seed(void); +extern void bootinfo_check_bootversion(const struct kexec_info *info); +extern void add_bootinfo(struct kexec_info *info, unsigned long addr); diff --git a/kexec/arch/m68k/include/arch/options.h b/kexec/arch/m68k/include/arch/options.h new file mode 100644 index 0000000..f279d54 --- /dev/null +++ b/kexec/arch/m68k/include/arch/options.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_ARCH_M68K_OPTIONS_H +#define KEXEC_ARCH_M68K_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_BOOTINFO (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_M68K_OPTIONS_H */ diff --git a/kexec/arch/m68k/kexec-elf-m68k.c b/kexec/arch/m68k/kexec-elf-m68k.c new file mode 100644 index 0000000..a2bf7ee --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-m68k.c @@ -0,0 +1,183 @@ +/* + * kexec-elf-m68k.c - kexec Elf loader for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + +#define KiB * 1024 +#define MiB * 1024 KiB + +#define PAGE_SIZE 4 KiB + + +int elf_m68k_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_68K) { + /* for a different architecture */ + fprintf(stderr, "Not for this architecture.\n"); + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_m68k_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --bootinfo=FILE Use FILE as the kernel's bootinfo\n" + ); +} + +static unsigned long segment_end(const struct kexec_info *info, int i) +{ + return (unsigned long)info->segment[i].mem + info->segment[i].memsz - 1; +} + +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *cmdline = NULL, *ramdisk_file = NULL; + int opt, result, i; + unsigned long bootinfo_addr, ramdisk_addr = 0; + off_t ramdisk_size = 0; + + /* See options.h if adding any more options. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + cmdline = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk_file = optarg; + break; + case OPT_BOOTINFO: + break; + } + } + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Fixup PT_LOAD segments that include the ELF header (offset zero) */ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type != PT_LOAD || phdr->p_offset) + continue; + + dbgprintf("Removing ELF header from segment %d\n", i); + phdr->p_paddr += PAGE_SIZE; + phdr->p_vaddr += PAGE_SIZE; + phdr->p_filesz -= PAGE_SIZE; + phdr->p_memsz -= PAGE_SIZE; + phdr->p_offset += PAGE_SIZE; + phdr->p_data += PAGE_SIZE; + } + + /* Load the ELF data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + /* Bootinfo must be stored right after the kernel */ + bootinfo_addr = segment_end(info, info->nr_segments - 1) + 1; + + /* Load ramdisk */ + if (ramdisk_file) { + void *ramdisk = slurp_decompress_file(ramdisk_file, + &ramdisk_size); + /* Store ramdisk at top of first memory chunk */ + ramdisk_addr = _ALIGN_DOWN(info->memory_range[0].end - + ramdisk_size + 1, + PAGE_SIZE); + if (!buf) + die("Ramdisk load failed\n"); + add_buffer(info, ramdisk, ramdisk_size, ramdisk_size, + PAGE_SIZE, ramdisk_addr, info->memory_range[0].end, + 1); + } + + /* Update and add bootinfo */ + bootinfo_set_cmdline(cmdline); + bootinfo_set_ramdisk(ramdisk_addr, ramdisk_size); + bootinfo_add_rng_seed(); + if (kexec_debug) + bootinfo_print(); + add_bootinfo(info, bootinfo_addr); + + /* + * Check if the kernel (and bootinfo) exceed 4 MiB, as current kernels + * don't support that. + * As the segments are still unsorted, the bootinfo is located in the + * last segment. + */ + if (segment_end(info, info->nr_segments - 1) >= virt_to_phys(4 MiB - 1)) + printf("WARNING: Kernel is larger than 4 MiB\n"); + + /* Check struct bootversion at start of kernel */ + bootinfo_check_bootversion(info); + + return 0; +} diff --git a/kexec/arch/m68k/kexec-elf-rel-m68k.c b/kexec/arch/m68k/kexec-elf-rel-m68k.c new file mode 100644 index 0000000..0cc38cc --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-rel-m68k.c @@ -0,0 +1,39 @@ +/* + * kexec-elf-rel-m68k.c - kexec Elf relocation routines + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->ei_class != ELFCLASS32) + return 0; + if (ehdr->e_machine != EM_68K) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/m68k/kexec-m68k.c b/kexec/arch/m68k/kexec-m68k.c new file mode 100644 index 0000000..cb54927 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.c @@ -0,0 +1,110 @@ +/* + * kexec-m68k.c - kexec for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + + +static unsigned long m68k_memoffset; + + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + bootinfo_load(); + *ranges = bootinfo_get_memory_ranges(range); + m68k_memoffset = (*range)[0].start; + return 0; +} + + +struct file_type file_type[] = { + {"elf-m68k", elf_m68k_probe, elf_m68k_load, elf_m68k_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + break; + case OPT_BOOTINFO: + bootinfo_file = optarg; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "m68k", KEXEC_ARCH_68K }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + /* Crash kernel region size is not exposed by the system */ + return -1; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr + m68k_memoffset; +} + +/* + * add_segment() should convert base to a physical address on m68k, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} diff --git a/kexec/arch/m68k/kexec-m68k.h b/kexec/arch/m68k/kexec-m68k.h new file mode 100644 index 0000000..99482c4 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_M68K_H +#define KEXEC_M68K_H + +int elf_m68k_probe(const char *buf, off_t len); +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_m68k_usage(void); + +#endif /* KEXEC_M68K_H */ diff --git a/kexec/arch/mips/Makefile b/kexec/arch/mips/Makefile new file mode 100644 index 0000000..1fe7886 --- /dev/null +++ b/kexec/arch/mips/Makefile @@ -0,0 +1,29 @@ +# +# kexec mips (linux booting linux) +# +mips_KEXEC_SRCS = kexec/arch/mips/kexec-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/kexec-elf-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/kexec-elf-rel-mips.c +mips_KEXEC_SRCS += kexec/arch/mips/crashdump-mips.c + +mips_FS2DT = kexec/fs2dt.c +mips_FS2DT_INCLUDE = \ + -include $(srcdir)/kexec/arch/mips/crashdump-mips.h \ + -include $(srcdir)/kexec/arch/mips/kexec-mips.h + +mips_DT_OPS += kexec/dt-ops.c + +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) +mips_CPPFLAGS += -I$(srcdir)/kexec/libfdt +mips_KEXEC_SRCS += $(libfdt_SRCS) + +mips_ADD_BUFFER = +mips_ADD_SEGMENT = +mips_VIRT_TO_PHYS = + +dist += kexec/arch/mips/Makefile $(mips_KEXEC_SRCS) \ + kexec/arch/mips/kexec-mips.h \ + kexec/arch/mips/crashdump-mips.h \ + kexec/arch/mips/include/arch/options.h diff --git a/kexec/arch/mips/crashdump-mips.c b/kexec/arch/mips/crashdump-mips.c new file mode 100644 index 0000000..00f4335 --- /dev/null +++ b/kexec/arch/mips/crashdump-mips.c @@ -0,0 +1,408 @@ +/* + * kexec: Linux boots Linux + * + * 2005 (C) IBM Corporation. + * 2008 (C) MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <inttypes.h> +#include <elf.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-mips.h" +#include "crashdump-mips.h" +#include "unused.h" + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region */ +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +/* Not used currently but required by generic fs2dt code */ +struct memory_ranges usablemem_rgns; + +/* Memory region reserved for storing panic kernel and other data. */ +static struct memory_range crash_reserved_mem; + +/* Read kernel physical load addr from the file returned by proc_iomem() + * (Kernel Code) and store in kexec_info */ +static int get_kernel_paddr(struct crash_elf_info *elf_info) +{ + uint64_t start; + + if (xen_present()) /* Kernel not entity mapped under Xen */ + return 0; + + if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { + elf_info->kern_paddr_start = start; + dbgprintf("kernel load physical addr start = 0x%" PRIu64 "\n", start); + return 0; + } + + fprintf(stderr, "Cannot determine kernel physical load addr\n"); + return -1; +} + +static int get_kernel_vaddr_and_size(struct crash_elf_info *elf_info, + unsigned long start_offset) +{ + uint64_t end; + + if (!elf_info->kern_paddr_start) + return -1; + + elf_info->kern_vaddr_start = elf_info->kern_paddr_start | + start_offset; + /* If "Kernel bss" exists, the kernel ends there, else fall + * through and say that it ends at "Kernel data" */ + if (parse_iomem_single("Kernel bss\n", NULL, &end) == 0 || + parse_iomem_single("Kernel data\n", NULL, &end) == 0) { + elf_info->kern_size = end - elf_info->kern_paddr_start; + dbgprintf("kernel_vaddr= 0x%llx paddr %llx\n", + elf_info->kern_vaddr_start, + elf_info->kern_paddr_start); + dbgprintf("kernel size = 0x%lx\n", elf_info->kern_size); + return 0; + } + fprintf(stderr, "Cannot determine kernel virtual load addr and size\n"); + return -1; +} + +/* Removes crash reserve region from list of memory chunks for whom elf program + * headers have to be created. Assuming crash reserve region to be a single + * continuous area fully contained inside one of the memory chunks */ +static int exclude_crash_reserve_region(int *nr_ranges) +{ + int i, j, tidx = -1; + unsigned long long cstart, cend; + struct memory_range temp_region = { + .start = 0, + .end = 0 + }; + + /* Crash reserved region. */ + cstart = crash_reserved_mem.start; + cend = crash_reserved_mem.end; + + for (i = 0; i < (*nr_ranges); i++) { + unsigned long long mstart, mend; + mstart = crash_memory_range[i].start; + mend = crash_memory_range[i].end; + if (cstart < mend && cend > mstart) { + if (cstart != mstart && cend != mend) { + /* Split memory region */ + crash_memory_range[i].end = cstart - 1; + temp_region.start = cend + 1; + temp_region.end = mend; + temp_region.type = RANGE_RAM; + tidx = i+1; + } else if (cstart != mstart) + crash_memory_range[i].end = cstart - 1; + else + crash_memory_range[i].start = cend + 1; + } + } + /* Insert split memory region, if any. */ + if (tidx >= 0) { + if (*nr_ranges == CRASH_MAX_MEMORY_RANGES) { + /* No space to insert another element. */ + fprintf(stderr, "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + return -1; + } + for (j = (*nr_ranges - 1); j >= tidx; j--) + crash_memory_range[j+1] = crash_memory_range[j]; + crash_memory_range[tidx].start = temp_region.start; + crash_memory_range[tidx].end = temp_region.end; + crash_memory_range[tidx].type = temp_region.type; + (*nr_ranges)++; + } + return 0; +} +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + const char iomem[] = "/proc/iomem"; + int memory_ranges = 0; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + iomem, strerror(errno)); + return -1; + } + /* Separate segment for backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + while (fgets(line, sizeof(line), fp) != 0) { + char *str; + int type, consumed, count; + if (memory_ranges >= CRASH_MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", + &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + + /* Only Dumping memory of type System RAM. */ + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "Crash kernel\n", 13) == 0) { + /* Reserved memory region. New kernel can + * use this region to boot into. */ + crash_reserved_mem.start = start; + crash_reserved_mem.end = end; + crash_reserved_mem.type = RANGE_RAM; + continue; + } else + continue; + + if (start == BACKUP_SRC_START && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + + /* Segregate linearly mapped region. */ + if (MAXMEM && (MAXMEM - 1) >= start && (MAXMEM - 1) <= end) { + crash_memory_range[memory_ranges - 1].end = MAXMEM - 1; + + /* Add segregated region. */ + crash_memory_range[memory_ranges].start = MAXMEM; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + + if (exclude_crash_reserve_region(&memory_ranges) < 0) + return -1; + + *range = crash_memory_range; + *ranges = memory_ranges; + return 0; +} + +/* Adds the appropriate mem= options to command line, indicating the + * memory region the new kernel can use to boot into. */ +static int cmdline_add_mem(char *cmdline, unsigned long addr, + unsigned long size) +{ + int cmdlen, len; + char str[50], *ptr; + + addr = addr/1024; + size = size/1024; + ptr = str; + strcpy(str, " mem="); + ptr += strlen(str); + ultoa(size, ptr); + strcat(str, "K@"); + ptr = str + strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* Adds the elfcorehdr= command line parameter to command line. */ +static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) +{ + int cmdlen, len, align = 1024; + char str[30], *ptr; + + /* Passing in elfcorehdr=xxxK format. Saves space required in cmdline. + * Ensure 1K alignment*/ + if (addr%align) + return -1; + addr = addr/align; + ptr = str; + strcpy(str, " elfcorehdr="); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, "K"); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + return 0; +} + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATALOCAL ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATALOCAL ELFDATA2MSB +#else +# error Unknown byte order +#endif + +static struct crash_elf_info elf_info64 = { + class: ELFCLASS64, + data : ELFDATALOCAL, + machine : EM_MIPS, + page_offset : PAGE_OFFSET, + lowmem_limit : 0, /* 0 == no limit */ +}; + +static struct crash_elf_info elf_info32 = { + class: ELFCLASS32, + data : ELFDATALOCAL, + machine : EM_MIPS, + page_offset : PAGE_OFFSET, + lowmem_limit : MAXMEM, +}; + +static int patch_elf_info(void) +{ + const char cpuinfo[] = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + + fp = fopen(cpuinfo, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + cpuinfo, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (strncmp(line, "cpu model", 9) == 0) { + /* OCTEON uses a different page_offset. */ + if (strstr(line, "Octeon")) + elf_info64.page_offset = OCTEON_PAGE_OFFSET; + /* LOONGSON uses a different page_offset. */ + else if (strstr(line, "Loongson")) + elf_info64.page_offset = LOONGSON_PAGE_OFFSET; + break; + } + } + fclose(fp); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + unsigned long UNUSED(max_addr), + unsigned long UNUSED(min_base)) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges, align = 1024; + struct memory_range *mem_range; + crash_create_elf_headers_func crash_create = crash_create_elf32_headers; + struct crash_elf_info *elf_info = &elf_info32; + unsigned long start_offset = 0x80000000UL; + + if (patch_elf_info()) + return -1; + + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + elf_info = &elf_info64; + crash_create = crash_create_elf64_headers; + start_offset = (unsigned long)0xffffffff80000000UL; + } + + if (get_kernel_paddr(elf_info)) + return -1; + + if (get_kernel_vaddr_and_size(elf_info, start_offset)) + return -1; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + crash_reserved_mem.start, + crash_reserved_mem.end, -1); + + if (crash_create(info, elf_info, crash_memory_range, nr_ranges, + &tmp, &sz, ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, + crash_reserved_mem.start, + crash_reserved_mem.end, -1); + + /* + * backup segment is after elfcorehdr, so use elfcorehdr as top of + * kernel's available memory + */ + cmdline_add_mem(mod_cmdline, crash_reserved_mem.start, + crash_reserved_mem.end - crash_reserved_mem.start + 1); + cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); + + dbgprintf("CRASH MEMORY RANGES:\n"); + dbgprintf("%016Lx-%016Lx\n", crash_reserved_mem.start, + crash_reserved_mem.end); + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/mips/crashdump-mips.h b/kexec/arch/mips/crashdump-mips.h new file mode 100644 index 0000000..55c9925 --- /dev/null +++ b/kexec/arch/mips/crashdump-mips.h @@ -0,0 +1,30 @@ +#ifndef CRASHDUMP_MIPS_H +#define CRASHDUMP_MIPS_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); +#ifdef __mips64 +#define PAGE_OFFSET 0xa800000000000000ULL +#define MAXMEM 0 +#else +#define PAGE_OFFSET 0x80000000 +#define MAXMEM 0x80000000 +#endif +#define __pa(x) ((unsigned long)(X) & 0x7fffffff) + +#define LOONGSON_PAGE_OFFSET 0xffffffff80000000ULL +#define OCTEON_PAGE_OFFSET 0x8000000000000000ULL + +#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) +#define CRASH_MAX_MEMORY_RANGES (MAX_MEMORY_RANGES + 2) + +#define COMMAND_LINE_SIZE 512 + +/* Backup Region, First 1M of System RAM. */ +#define BACKUP_SRC_START 0x00000000 +#define BACKUP_SRC_END 0x000fffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +extern struct arch_options_t arch_options; +#endif /* CRASHDUMP_MIPS_H */ diff --git a/kexec/arch/mips/include/arch/options.h b/kexec/arch/mips/include/arch/options.h new file mode 100644 index 0000000..ba2f346 --- /dev/null +++ b/kexec/arch/mips/include/arch/options.h @@ -0,0 +1,43 @@ +#ifndef KEXEC_ARCH_MIPS_OPTIONS_H +#define KEXEC_ARCH_MIPS_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_DTB (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"dtb", 1, 0, OPT_DTB }, \ + {"initrd", 1, 0, OPT_RAMDISK }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_MIPS_OPTIONS_H */ diff --git a/kexec/arch/mips/kexec-elf-mips.c b/kexec/arch/mips/kexec-elf-mips.c new file mode 100644 index 0000000..230d806 --- /dev/null +++ b/kexec/arch/mips/kexec-elf-mips.c @@ -0,0 +1,242 @@ +/* + * kexec-elf-mips.c - kexec Elf loader for mips + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-elf-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-mips.h" +#include "crashdump-mips.h" +#include <arch/options.h> +#include "../../fs2dt.h" +#include "../../dt-ops.h" + +static const int probe_debug = 0; + +#define BOOTLOADER "kexec" +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) + +#define CMDLINE_PREFIX "kexec " +static char cmdline_buf[COMMAND_LINE_SIZE] = CMDLINE_PREFIX; + +/* Adds initrd parameters to command line. */ +static int cmdline_add_initrd(char *cmdline, unsigned long addr, char *new_para) +{ + int cmdlen, len; + char str[30], *ptr; + + ptr = str; + strcpy(str, new_para); + ptr += strlen(str); + ultoa(addr, ptr); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + return 0; +} + +/* add initrd to cmdline to compatible with previous platforms. */ +static int patch_initrd_info(char *cmdline, unsigned long base, + unsigned long size) +{ + const char cpuinfo[] = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + unsigned long page_offset = PAGE_OFFSET; + + fp = fopen(cpuinfo, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", + cpuinfo, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (strncmp(line, "cpu model", 9) == 0) { + if (strstr(line, "Loongson")) { + /* LOONGSON64 uses a different page_offset. */ + if (arch_options.core_header_type == + CORE_TYPE_ELF64) + page_offset = LOONGSON_PAGE_OFFSET; + cmdline_add_initrd(cmdline, + page_offset + base, " rd_start="); + cmdline_add_initrd(cmdline, size, " rd_size="); + break; + } + } + } + fclose(fp); + return 0; +} + +int elf_mips_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_MIPS) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_mips_usage(void) +{ +} + +int elf_mips_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + int command_line_len = 0; + char *crash_cmdline; + int result; + unsigned long cmdline_addr; + size_t i; + off_t dtb_length; + char *dtb_buf; + char *initrd_buf = NULL; + unsigned long long kernel_addr = 0, kernel_size = 0; + unsigned long pagesize = getpagesize(); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + } else + crash_cmdline = NULL; + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Read in the PT_LOAD segments and remove CKSEG0 mask from address*/ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type == PT_LOAD) { + phdr->p_paddr = virt_to_phys(phdr->p_paddr); + kernel_addr = phdr->p_paddr; + kernel_size = phdr->p_memsz; + } + } + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + if (arch_options.command_line) + command_line_len = strlen(arch_options.command_line) + 1; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, crash_cmdline, + 0, 0); + if (result < 0) { + free(crash_cmdline); + return -1; + } + } + + if (arch_options.command_line) + strncat(cmdline_buf, arch_options.command_line, command_line_len); + if (crash_cmdline) + { + strncat(cmdline_buf, crash_cmdline, + sizeof(crash_cmdline) - + strlen(crash_cmdline) - 1); + free(crash_cmdline); + } + + if (info->kexec_flags & KEXEC_ON_CRASH) + /* In case of crashdump segment[0] is kernel. + * Put cmdline just after it. */ + cmdline_addr = (unsigned long)info->segment[0].mem + + info->segment[0].memsz; + else + cmdline_addr = 0; + + /* MIPS systems that have been converted to use device tree + * passed through UHI will use commandline in the DTB and + * the DTB passed as a separate buffer. Note that + * CMDLINE_PREFIX is skipped here intentionally, as it is + * used only in the legacy method */ + + if (arch_options.dtb_file) { + dtb_buf = slurp_file(arch_options.dtb_file, &dtb_length); + } else { + create_flatten_tree(&dtb_buf, &dtb_length, cmdline_buf + strlen(CMDLINE_PREFIX)); + } + + if (arch_options.initrd_file) { + initrd_buf = slurp_file(arch_options.initrd_file, &initrd_size); + + /* Create initrd entries in dtb - although at this time + * they would not point to the correct location */ + dtb_set_initrd(&dtb_buf, &dtb_length, (off_t)initrd_buf, (off_t)initrd_buf + initrd_size); + + initrd_base = add_buffer(info, initrd_buf, initrd_size, + initrd_size, sizeof(void *), + _ALIGN_UP(kernel_addr + kernel_size + dtb_length, + pagesize), 0x0fffffff, 1); + + /* Now that the buffer for initrd is prepared, update the dtb + * with an appropriate location */ + dtb_set_initrd(&dtb_buf, &dtb_length, initrd_base, initrd_base + initrd_size); + + /* Add the initrd parameters to cmdline */ + patch_initrd_info(cmdline_buf, initrd_base, initrd_size); + } + /* This is a legacy method for commandline passing used + * currently by Octeon CPUs only */ + add_buffer(info, cmdline_buf, sizeof(cmdline_buf), + sizeof(cmdline_buf), sizeof(void *), + cmdline_addr, 0x0fffffff, 1); + + add_buffer(info, dtb_buf, dtb_length, dtb_length, 0, + _ALIGN_UP(kernel_addr + kernel_size, pagesize), + 0x0fffffff, 1); + + return 0; +} + diff --git a/kexec/arch/mips/kexec-elf-rel-mips.c b/kexec/arch/mips/kexec-elf-rel-mips.c new file mode 100644 index 0000000..5bc22d5 --- /dev/null +++ b/kexec/arch/mips/kexec-elf-rel-mips.c @@ -0,0 +1,46 @@ +/* + * kexec-elf-rel-mips.c - kexec Elf relocation routines + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-elf-rel-ppc.c + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_MIPS) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch(r_type) { + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/mips/kexec-mips.c b/kexec/arch/mips/kexec-mips.c new file mode 100644 index 0000000..d8cbea8 --- /dev/null +++ b/kexec/arch/mips/kexec-mips.c @@ -0,0 +1,191 @@ +/* + * kexec-mips.c - kexec for mips + * Copyright (C) 2007 Francesco Chiechi, Alessandro Rubini + * Copyright (C) 2007 Tvblob s.r.l. + * + * derived from ../ppc/kexec-ppc.c + * Copyright (C) 2004, 2005 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-mips.h" +#include <arch/options.h> + +/* Currently not used but required by top-level fs2dt code */ +off_t initrd_base = 0; +off_t initrd_size = 0; + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + const char iomem[] = "/proc/iomem"; + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end; + char *str; + int type, consumed, count; + + fp = fopen(iomem, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno)); + return -1; + } + while (fgets(line, sizeof(line), fp) != 0) { + if (memory_ranges >= MAX_MEMORY_RANGES) + break; + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + end = end + 1; + if (memcmp(str, "System RAM\n", 11) == 0) { + type = RANGE_RAM; + } else if (memcmp(str, "reserved\n", 9) == 0) { + type = RANGE_RESERVED; + } else { + continue; + } + if (memory_ranges > 0 && + memory_range[memory_ranges - 1].end == start && + memory_range[memory_ranges - 1].type == type) { + memory_range[memory_ranges - 1].end = end; + } else { + memory_range[memory_ranges].start = start; + memory_range[memory_ranges].end = end; + memory_range[memory_ranges].type = type; + memory_ranges++; + } + } + fclose(fp); + *range = memory_range; + *ranges = memory_ranges; + return 0; +} + +struct file_type file_type[] = { + {"elf-mips", elf_mips_probe, elf_mips_load, elf_mips_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --dtb=FILE Use FILE as the device tree blob.\n" + " --initrd=FILE Use FILE as initial ramdisk.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + ); +} + +struct arch_options_t arch_options = { +#ifdef __mips64 + .core_header_type = CORE_TYPE_ELF64, +#else + .core_header_type = CORE_TYPE_ELF32, +#endif +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + int opt; + char *cmdline = NULL; + const char *append = NULL; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_DTB: + arch_options.dtb_file = optarg; + break; + case OPT_RAMDISK: + arch_options.initrd_file = optarg; + break; + default: + break; + } + } + + arch_options.command_line = concat_cmdline(cmdline, append); + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arch_options.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arch_options.initrd_file); + dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, + arch_options.dtb_file); + + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_MIPS here. + */ + { "mips", KEXEC_ARCH_MIPS }, + { "mips64", KEXEC_ARCH_MIPS }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr & 0x7fffffff; +} + +/* + * add_segment() should convert base to a physical address on mips, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, virt_to_phys(base), memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on mips, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} + diff --git a/kexec/arch/mips/kexec-mips.h b/kexec/arch/mips/kexec-mips.h new file mode 100644 index 0000000..222c815 --- /dev/null +++ b/kexec/arch/mips/kexec-mips.h @@ -0,0 +1,30 @@ +#ifndef KEXEC_MIPS_H +#define KEXEC_MIPS_H + +#include <sys/types.h> + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 + +#define MAX_MEMORY_RANGES 64 +#define MAX_LINE 160 + +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +int elf_mips_probe(const char *buf, off_t len); +int elf_mips_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_mips_usage(void); + +struct arch_options_t { + char *command_line; + char *dtb_file; + char *initrd_file; + int core_header_type; +}; + +extern struct memory_ranges usablemem_rgns; +extern off_t initrd_base, initrd_size; + +#endif /* KEXEC_MIPS_H */ diff --git a/kexec/arch/ppc/Makefile b/kexec/arch/ppc/Makefile new file mode 100644 index 0000000..71871f1 --- /dev/null +++ b/kexec/arch/ppc/Makefile @@ -0,0 +1,34 @@ +# +# kexec ppc (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc_KEXEC_SRCS = kexec/arch/ppc/kexec-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-rel-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-dol-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-uImage-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-simple.S +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-dol.S +ppc_KEXEC_SRCS += kexec/arch/ppc/fixup_dtb.c +ppc_KEXEC_SRCS += kexec/arch/ppc/fs2dt.c +ppc_KEXEC_SRCS += kexec/arch/ppc/crashdump-powerpc.c + +ppc_UIMAGE = kexec/kexec-uImage.c + +ppc_libfdt_SRCS = kexec/arch/ppc/libfdt-wrapper.c +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) +ppc_ARCH_REUSE_INITRD = + +ppc_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +ppc_KEXEC_SRCS += $(libfdt_SRCS) $(ppc_libfdt_SRCS) + +ppc_ASFLAGS = -Wa,--noexecstack + +dist += kexec/arch/ppc/Makefile $(ppc_KEXEC_SRCS) \ + kexec/arch/ppc/crashdump-powerpc.h kexec/arch/ppc/fixup_dtb.h \ + kexec/arch/ppc/kexec-ppc.h kexec/arch/ppc/ops.h \ + kexec/arch/ppc/ppc_asm.h \ + kexec/arch/ppc/include/page.h kexec/arch/ppc/include/types.h \ + kexec/arch/ppc/include/arch/options.h diff --git a/kexec/arch/ppc/crashdump-powerpc.c b/kexec/arch/ppc/crashdump-powerpc.c new file mode 100644 index 0000000..15e8531 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.c @@ -0,0 +1,433 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +#ifdef CONFIG_PPC64 +static struct crash_elf_info elf_info64 = { +class: ELFCLASS64, +data: ELFDATA2MSB, +machine: EM_PPC64, +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; +#endif +static struct crash_elf_info elf_info32 = { +class: ELFCLASS32, +data: ELFDATA2MSB, +#ifdef CONFIG_PPC64 +machine: EM_PPC64, +#else +machine: EM_PPC, +#endif +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range; +static int crash_nr_memory_ranges; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +/* Append a segment to crash_memory_range, splitting it into two if + * it contains both lowmem and highmem */ +static void add_crash_memory_range(unsigned long long start, + unsigned long long end) +{ +#ifndef CONFIG_PPC64 + if (start < elf_info32.lowmem_limit && end > elf_info32.lowmem_limit) { + add_crash_memory_range(start, elf_info32.lowmem_limit); + add_crash_memory_range(elf_info32.lowmem_limit, end); + return; + } +#endif + + if (crash_nr_memory_ranges < crash_max_memory_ranges) { + crash_memory_range[crash_nr_memory_ranges].start = start; + crash_memory_range[crash_nr_memory_ranges].end = end; + crash_memory_range[crash_nr_memory_ranges].type = RANGE_RAM; + } + + crash_nr_memory_ranges++; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *dmem; + int fd; + struct dirent *dentry, *mentry; + int n, crash_rng_len = 0; + unsigned long long start, end, cstart, cend; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + crash_nr_memory_ranges = 0; + +#ifndef CONFIG_BOOKE + /* create a separate program header for the backup region */ + add_crash_memory_range(BACKUP_SRC_START, BACKUP_SRC_END + 1); +#endif + + dir = opendir(device_tree); + if (!dir) { + perror(device_tree); + goto err; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) + && strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + dmem = opendir(fname); + if (!dmem) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + fd = open(fname, O_RDONLY); + if (fd < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + n = read_memory_region_limits(fd, &start, &end); + /* We are done with fd, close it. */ + close(fd); + if (n != 0) { + closedir(dmem); + closedir(dir); + goto err; + } +#ifndef CONFIG_BOOKE + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; +#endif + + /* + * Exclude the region that lies within crashkernel. + * If memory limit is set then exclude memory region + * above it. + */ + + if (memory_limit) { + if (start >= memory_limit) + continue; + if (end > memory_limit) + end = memory_limit; + } + + /* + * Exclure region used by crash kernel + */ + cstart = crash_base; + cend = crash_base + crash_size; + + if (cstart >= end || cend <= start) + add_crash_memory_range(start, end); + else { + if (start < cstart) + add_crash_memory_range(start, cstart); + if (cend < end) + add_crash_memory_range(cend, end); + } + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + add_crash_memory_range(cstart, cend); + } + + if (crash_nr_memory_ranges >= crash_max_memory_ranges) { + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + *range = crash_memory_range; + *ranges = crash_nr_memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for (j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +/* Converts unsigned long to ascii string. */ +static void ulltoa(unsigned long long i, char *str) +{ + int j = 0, k; + char tmp; + + do { + str[j++] = i % 10 + '0'; + } while ((i /= 10) > 0); + str[j] = '\0'; + + /* Reverse the string. */ + for (j = 0, k = strlen(str) - 1; j < k; j++, k--) { + tmp = str[k]; + str[k] = str[j]; + str[j] = tmp; + } +} + +/* Append str to cmdline */ +static void add_cmdline(char *cmdline, char *str) +{ + int cmdline_size; + int cmdlen = strlen(cmdline) + strlen(str); + + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); +} + +static int add_cmdline_param(char *cmdline, unsigned long long addr, + char *cmdstr, char *byte) +{ + int align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ulltoa(addr, ptr); + strcat(str, byte); + + add_cmdline(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; +#ifndef CONFIG_BOOKE + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); +#endif + + /* On powerpc memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + +#ifdef CONFIG_PPC64 + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, &tmp, + &sz, ELF_CORE_HEADER_ALIGN) < 0) + return -1; + } else if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) + return -1; +#else + if (crash_create_elf32_headers(info, &elf_info32, crash_memory_range, + nr_ranges, &tmp, &sz, ELF_CORE_HEADER_ALIGN) + < 0) + return -1; +#endif + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, + min_base, max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + add_cmdline(mod_cmdline, " maxcpus=1"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i = 0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; + return; + } else if (end > uend) { + usablemem_rgns.ranges[i].end = end; + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = value; + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + value - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + diff --git a/kexec/arch/ppc/crashdump-powerpc.h b/kexec/arch/ppc/crashdump-powerpc.h new file mode 100644 index 0000000..97b5095 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.h @@ -0,0 +1,45 @@ +#ifndef CRASHDUMP_POWERPC_H +#define CRASHDUMP_POWERPC_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +extern struct arch_options_t arch_options; + +#ifdef CONFIG_PPC64 +#define PAGE_OFFSET 0xC000000000000000UL +#define VMALLOCBASE 0xD000000000000000UL +#define MAXMEM (-KERNELBASE-VMALLOCBASE) +#else +#define PAGE_OFFSET 0xC0000000 +#define MAXMEM 0x30000000 /* Use CONFIG_LOWMEM_SIZE from kernel */ +#endif + +#define KERNELBASE PAGE_OFFSET +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + +#ifdef CONFIG_BOOKE +/* We don't need backup region in Book E */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0x0000 +#define BACKUP_SRC_SIZE 0x0000 +#else +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) +#endif + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +extern unsigned long long crash_base; +extern unsigned long long crash_size; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; +extern uint64_t memory_limit; + +#endif /* CRASHDUMP_POWERPC_H */ diff --git a/kexec/arch/ppc/fixup_dtb.c b/kexec/arch/ppc/fixup_dtb.c new file mode 100644 index 0000000..92a0bfd --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.c @@ -0,0 +1,408 @@ +#define _GNU_SOURCE +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include <libfdt.h> +#include "ops.h" +#include "page.h" +#include "fixup_dtb.h" +#include "kexec-ppc.h" + +const char proc_dts[] = "/proc/device-tree"; + +static void print_fdt_reserve_regions(char *blob_buf) +{ + int i, num; + + if (!kexec_debug) + return; + /* Print out a summary of the final reserve regions */ + num = fdt_num_mem_rsv(blob_buf); + dbgprintf ("reserve regions: %d\n", num); + for (i = 0; i < num; i++) { + uint64_t offset, size; + + if (fdt_get_mem_rsv(blob_buf, i, &offset, &size) == 0) { + dbgprintf("%d: offset: %llx, size: %llx\n", i, offset, size); + } else { + dbgprintf("Error retreiving reserved region\n"); + } + } +} + + +static void fixup_nodes(char *nodes[]) +{ + int index = 0; + char *fname; + char *prop_name; + char *node_name; + void *node; + int len; + char *content; + off_t content_size; + int ret; + + while (nodes[index]) { + + len = asprintf(&fname, "%s%s", proc_dts, nodes[index]); + if (len < 0) + die("asprintf() failed\n"); + + content = slurp_file(fname, &content_size); + if (!content) { + die("Can't open %s: %s\n", fname, strerror(errno)); + } + + prop_name = fname + len; + while (*prop_name != '/') + prop_name--; + + *prop_name = '\0'; + prop_name++; + + node_name = fname + sizeof(proc_dts) - 1; + + node = finddevice(node_name); + if (!node) + node = create_node(NULL, node_name + 1); + + ret = setprop(node, prop_name, content, content_size); + if (ret < 0) + die("setprop of %s/%s size: %ld failed: %s\n", + node_name, prop_name, content_size, + fdt_strerror(ret)); + + free(content); + free(fname); + index++; + }; +} + +/* + * command line priority: + * - use the supplied command line + * - if none available use the command line from .dtb + * - if not available use the current command line + */ +static void fixup_cmdline(const char *cmdline) +{ + void *chosen; + char *fixup_cmd_node[] = { + "/chosen/bootargs", + NULL, + }; + + chosen = finddevice("/chosen"); + + if (!cmdline) { + if (!chosen) + fixup_nodes(fixup_cmd_node); + } else { + if (!chosen) + chosen = create_node(NULL, "chosen"); + setprop_str(chosen, "bootargs", cmdline); + } + return; +} + +#define EXPAND_GRANULARITY 1024 + +static char *expand_buf(int minexpand, char *blob_buf, off_t *blob_size) +{ + int size = fdt_totalsize(blob_buf); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + blob_buf = realloc(blob_buf, size); + if (!blob_buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(blob_buf, blob_buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + *blob_size = fdt_totalsize(blob_buf); + + return blob_buf; +} + +static void fixup_reserve_regions(struct kexec_info *info, char *blob_buf) +{ + int ret, i; + int nodeoffset; + u64 val = 0; + + /* If this is a KEXEC kernel we add all regions since they will + * all need to be saved */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + for (i = 0; i < info->nr_segments; i++) { + uint64_t address = (unsigned long)info->segment[i].mem; + uint64_t size = info->segment[i].memsz; + + while ((i+1) < info->nr_segments && + (address + size == (unsigned long)info->segment[i+1].mem)) { + size += info->segment[++i].memsz; + } + + ret = fdt_add_mem_rsv(blob_buf, address, size); + if (ret) { + printf("%s: Error adding memory range to memreserve!\n", + fdt_strerror(ret)); + goto out; + } + } + } else if (ramdisk || reuse_initrd) { + /* Otherwise we just add back the ramdisk and the device tree + * is already in the list */ + ret = fdt_add_mem_rsv(blob_buf, ramdisk_base, ramdisk_size); + if (ret) { + printf("%s: Unable to add new reserved memory for initrd flat device tree\n", + fdt_strerror(ret)); + goto out; + } + } + +#if 0 + /* XXX: Do not reserve spin-table for CPUs. */ + + /* Add reserve regions for cpu-release-addr */ + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, -1, "device_type", "cpu", 4); + while (nodeoffset != -FDT_ERR_NOTFOUND) { + const void *buf; + int sz, ret; + u64 tmp; + + buf = fdt_getprop(blob_buf, nodeoffset, "cpu-release-addr", &sz); + + if (buf) { + if (sz == 4) { + tmp = *(u32 *)buf; + } else if (sz == 8) { + tmp = *(u64 *)buf; + } + + /* crude check to see if last value is repeated */ + if (_ALIGN_DOWN(tmp, PAGE_SIZE) != _ALIGN_DOWN(val, PAGE_SIZE)) { + val = tmp; + ret = fdt_add_mem_rsv(blob_buf, _ALIGN_DOWN(val, PAGE_SIZE), PAGE_SIZE); + if (ret) + printf("%s: Unable to add reserve for cpu-release-addr!\n", + fdt_strerror(ret)); + } + } + + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, nodeoffset, + "device_type", "cpu", 4); + } +#endif + +out: + print_fdt_reserve_regions(blob_buf); +} + +static void fixup_memory(struct kexec_info *info, char *blob_buf) +{ + if (info->kexec_flags & KEXEC_ON_CRASH) { + int nodeoffset, len = 0; + u8 tmp[16]; + const unsigned long *addrcell, *sizecell; + + nodeoffset = fdt_path_offset(blob_buf, "/memory"); + + if (nodeoffset < 0) { + printf("Error searching for memory node!\n"); + return; + } + + addrcell = fdt_getprop(blob_buf, 0, "#address-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((addrcell) && (*addrcell == 2)) { + tmp[0] = (crash_base >> 56) & 0xff; + tmp[1] = (crash_base >> 48) & 0xff; + tmp[2] = (crash_base >> 40) & 0xff; + tmp[3] = (crash_base >> 32) & 0xff; + tmp[4] = (crash_base >> 24) & 0xff; + tmp[5] = (crash_base >> 16) & 0xff; + tmp[6] = (crash_base >> 8) & 0xff; + tmp[7] = (crash_base ) & 0xff; + len = 8; + } else { + tmp[0] = (crash_base >> 24) & 0xff; + tmp[1] = (crash_base >> 16) & 0xff; + tmp[2] = (crash_base >> 8) & 0xff; + tmp[3] = (crash_base ) & 0xff; + len = 4; + } + + sizecell = fdt_getprop(blob_buf, 0, "#size-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((sizecell) && (*sizecell == 2)) { + tmp[0+len] = (crash_size >> 56) & 0xff; + tmp[1+len] = (crash_size >> 48) & 0xff; + tmp[2+len] = (crash_size >> 40) & 0xff; + tmp[3+len] = (crash_size >> 32) & 0xff; + tmp[4+len] = (crash_size >> 24) & 0xff; + tmp[5+len] = (crash_size >> 16) & 0xff; + tmp[6+len] = (crash_size >> 8) & 0xff; + tmp[7+len] = (crash_size ) & 0xff; + len += 8; + } else { + tmp[0+len] = (crash_size >> 24) & 0xff; + tmp[1+len] = (crash_size >> 16) & 0xff; + tmp[2+len] = (crash_size >> 8) & 0xff; + tmp[3+len] = (crash_size ) & 0xff; + len += 4; + } + + if (fdt_setprop(blob_buf, nodeoffset, "reg", tmp, len) != 0) { + printf ("Error setting memory node!\n"); + } + + fdt_delprop(blob_buf, nodeoffset, "linux,usable-memory"); + } +} + +/* removes crashkernel nodes if they exist and we are *rebooting* + * into a crashkernel. These nodes should not exist after we + * crash and reboot into a new kernel + */ +static void fixup_crashkernel(struct kexec_info *info, char *blob_buf) +{ + int nodeoffset; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (nodeoffset < 0) { + printf("fdt_crashkernel: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-base"); + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-size"); + } +} +/* remove the old chosen nodes if they exist and add correct chosen + * nodes if we have an initd + */ +static void fixup_initrd(char *blob_buf) +{ + int err, nodeoffset; + unsigned long tmp; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (nodeoffset < 0) { + printf("fdt_initrd: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-start"); + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-end"); + + if ((reuse_initrd || ramdisk) && + ((ramdisk_base != 0) && (ramdisk_size != 0))) { + tmp = ramdisk_base; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-start", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: " + "could not set linux,initrd-start %s.\n", + fdt_strerror(err)); + return; + } + + tmp = ramdisk_base + ramdisk_size; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-end", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: could not set linux,initrd-end %s.\n", + fdt_strerror(err)); + return; + } + } +} + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr) +{ + int ret, i, num = fdt_num_mem_rsv(blob_buf); + + fdt_init(blob_buf); + + /* Remove the existing reserve regions as they will no longer + * be valid after we reboot */ + for (i = num - 1; i >= 0; i--) { + ret = fdt_del_mem_rsv(blob_buf, i); + if (ret) { + printf("%s: Error deleting memory reserve region %d from device tree!\n", + fdt_strerror(ret), i); + } + } + + /* Pack the FDT first, so we don't grow excessively if there is already free space */ + ret = fdt_pack(blob_buf); + if (ret) + printf("%s: Unable to pack flat device tree\n", fdt_strerror(ret)); + + /* info->nr_segments just a guide, will grow by at least EXPAND_GRANULARITY */ + blob_buf = expand_buf(info->nr_segments * sizeof(struct fdt_reserve_entry), + blob_buf, blob_size); + + /* add reserve region for *THIS* fdt */ + *dtb_addr = locate_hole(info, *blob_size, 0, + hole_addr, hole_addr+KERNEL_ACCESS_TOP, -1); + ret = fdt_add_mem_rsv(blob_buf, *dtb_addr, PAGE_ALIGN(*blob_size)); + if (ret) { + printf("%s: Unable to add new reserved memory for the flat device tree\n", + fdt_strerror(ret)); + } + + return blob_buf; +} + +static void save_fixed_up_dtb(char *blob_buf, off_t blob_size) +{ + FILE *fp; + + if (!kexec_debug) + return; + fp = fopen("debug.dtb", "w"); + if (fp) { + if ( blob_size == fwrite(blob_buf, sizeof(char), blob_size, fp)) { + dbgprintf("debug.dtb written\n"); + } else { + dbgprintf("Unable to write debug.dtb\n"); + } + + fclose(fp); + } else { + dbgprintf("Unable to dump flat device tree to debug.dtb\n"); + } +} + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline) +{ + fixup_nodes(nodes); + fixup_cmdline(cmdline); + fixup_reserve_regions(info, blob_buf); + fixup_memory(info, blob_buf); + fixup_initrd(blob_buf); + fixup_crashkernel(info, blob_buf); + + blob_buf = (char *)dt_ops.finalize(); + *blob_size = fdt_totalsize(blob_buf); + + save_fixed_up_dtb(blob_buf, *blob_size); + + return blob_buf; +} diff --git a/kexec/arch/ppc/fixup_dtb.h b/kexec/arch/ppc/fixup_dtb.h new file mode 100644 index 0000000..b706a5a --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.h @@ -0,0 +1,10 @@ +#ifndef __FIXUP_DTB_H +#define __FIXUP_DTB_H + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr); + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline); + +#endif diff --git a/kexec/arch/ppc/fs2dt.c b/kexec/arch/ppc/fs2dt.c new file mode 100644 index 0000000..fed499b --- /dev/null +++ b/kexec/arch/ppc/fs2dt.c @@ -0,0 +1,471 @@ +/* + * fs2dt: creates a flattened device-tree + * + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include "types.h" + +#define MAXPATH 1024 /* max path name length */ +#define NAMESPACE 16384 /* max bytes for property names */ +#define TREEWORDS 65536 /* max 32 bit words for properties */ +#define MEMRESERVE 256 /* max number of reserved memory blks */ +#define MAX_MEMORY_RANGES 1024 + +static char pathname[MAXPATH]; +static char propnames[NAMESPACE] = { 0 }; +static unsigned dtstruct[TREEWORDS], *dt; +static unsigned long long mem_rsrv[2*MEMRESERVE] = { 0, 0 }; + +static int crash_param; +static char local_cmdline[COMMAND_LINE_SIZE] = { "" }; +static unsigned *dt_len; /* changed len of modified cmdline + in flat device-tree */ +static struct bootblock bb[1]; + +void reserve(unsigned long long where, unsigned long long length) +{ + size_t offset; + + for (offset = 0; mem_rsrv[offset + 1]; offset += 2) + ; + + if (offset + 4 >= 2 * MEMRESERVE) + die("unrecoverable error: exhasuted reservation meta data\n"); + + mem_rsrv[offset] = where; + mem_rsrv[offset + 1] = length; + mem_rsrv[offset + 3] = 0; /* N.B: don't care about offset + 2 */ +} + +/* look for properties we need to reserve memory space for */ +static void checkprop(char *name, unsigned *data, int len) +{ + static unsigned long long base, size, end; + + if ((data == NULL) && (base || size || end)) + die("unrecoverable error: no property data"); + else if (!strcmp(name, "linux,rtas-base")) + base = *data; + else if (!strcmp(name, "linux,tce-base")) + base = *(unsigned long long *) data; + else if (!strcmp(name, "rtas-size") || + !strcmp(name, "linux,tce-size")) + size = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) + if (len == 8) + base = *(unsigned long long *) data; + else + base = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) + end = *(unsigned long long *) data; + + if (size && end) + die("unrecoverable error: size and end set at same time\n"); + if (base && size) { + reserve(base, size); + base = 0; + size = 0; + } + if (base && end) { + reserve(base, end-base); + base = 0; + end = 0; + } +} + +/* + * return the property index for a property name, creating a new one + * if needed. + */ +static unsigned propnum(const char *name) +{ + unsigned offset = 0; + + while (propnames[offset]) + if (strcmp(name, propnames+offset)) + offset += strlen(propnames+offset)+1; + else + return offset; + + if (NAMESPACE - offset < strlen(name) + 1) + die("unrecoverable error: propnames overrun\n"); + + strcpy(propnames+offset, name); + + return offset; +} + +static void add_usable_mem_property(int fd, int len) +{ + char fname[MAXPATH], *bname; + unsigned long buf[2]; + unsigned long ranges[2*MAX_MEMORY_RANGES]; + unsigned long long base, end, loc_base, loc_end; + int range, rlen = 0; + + strcpy(fname, pathname); + bname = strrchr(fname, '/'); + bname[0] = '\0'; + bname = strrchr(fname, '/'); + if (strncmp(bname, "/memory@", 8) && strcmp(bname, "/memory")) + return; + + if (lseek(fd, 0, SEEK_SET) < 0) + die("unrecoverable error: error seeking in \"%s\": %s\n", + pathname, strerror(errno)); + if (read_memory_region_limits(fd, &base, &end) != 0) + die("unrecoverable error: error parsing memory/reg limits\n"); + + for (range = 0; range < usablemem_rgns.size; range++) { + loc_base = usablemem_rgns.ranges[range].start; + loc_end = usablemem_rgns.ranges[range].end; + if (loc_base >= base && loc_end <= end) { + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } + } + + if (!rlen) { + /* + * User did not pass any ranges for thsi region. Hence, write + * (0,0) duple in linux,usable-memory property such that + * this region will be ignored. + */ + ranges[rlen++] = 0; + ranges[rlen++] = 0; + } + + rlen = rlen * sizeof(unsigned long); + /* + * No add linux,usable-memory property. + */ + *dt++ = 3; + *dt++ = rlen; + *dt++ = propnum("linux,usable-memory"); + memcpy(dt, &ranges, rlen); + dt += (rlen + 3)/4; +} + +/* put all properties (files) in the property structure */ +static void putprops(char *fn, struct dirent **nlist, int numlist) +{ + struct dirent *dp; + int i = 0, fd, len; + struct stat statbuf; + + for (i = 0; i < numlist; i++) { + dp = nlist[i]; + strcpy(fn, dp->d_name); + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (!crash_param && !strcmp(fn, "linux,crashkernel-base")) + continue; + + if (!crash_param && !strcmp(fn, "linux,crashkernel-size")) + continue; + + /* + * This property will be created for each node during kexec + * boot. So, ignore it. + */ + if (!strcmp(dp->d_name, "linux,pci-domain") || + !strcmp(dp->d_name, "linux,htab-base") || + !strcmp(dp->d_name, "linux,htab-size") || + !strcmp(dp->d_name, "linux,kernel-end") || + !strcmp(dp->d_name, "linux,usable-memory")) + continue; + + /* This property will be created/modified later in putnode() + * So ignore it, unless we are reusing the initrd. + */ + if ((!strcmp(dp->d_name, "linux,initrd-start") || + !strcmp(dp->d_name, "linux,initrd-end")) && + !reuse_initrd) + continue; + + if (!S_ISREG(statbuf.st_mode)) + continue; + + len = statbuf.st_size; + + *dt++ = 3; + dt_len = dt; + *dt++ = len; + *dt++ = propnum(fn); + + fd = open(pathname, O_RDONLY); + if (fd == -1) + die("unrecoverable error: could not open \"%s\": %s\n", + pathname, strerror(errno)); + + if (read(fd, dt, len) != len) + die("unrecoverable error: could not read \"%s\": %s\n", + pathname, strerror(errno)); + + checkprop(fn, dt, len); + + /* Get the cmdline from the device-tree and modify it */ + if (!strcmp(dp->d_name, "bootargs")) { + int cmd_len; + char temp_cmdline[COMMAND_LINE_SIZE] = { "" }; + char *param = NULL; + cmd_len = strlen(local_cmdline); + if (cmd_len != 0) { + param = strstr(local_cmdline, "crashkernel="); + if (param) + crash_param = 1; + param = strstr(local_cmdline, "root="); + } + if (!param) { + char *old_param; + memcpy(temp_cmdline, dt, len); + param = strstr(temp_cmdline, "root="); + if (param) { + old_param = strtok(param, " "); + if (cmd_len != 0) + strcat(local_cmdline, " "); + strcat(local_cmdline, old_param); + } + } + strcat(local_cmdline, " "); + cmd_len = strlen(local_cmdline); + cmd_len = cmd_len + 1; + memcpy(dt, local_cmdline, cmd_len); + len = cmd_len; + *dt_len = cmd_len; + + dbgprintf("Modified cmdline:%s\n", local_cmdline); + + } + + dt += (len + 3)/4; + if (!strcmp(dp->d_name, "reg") && usablemem_rgns.size) + add_usable_mem_property(fd, len); + close(fd); + } + + fn[0] = '\0'; + checkprop(pathname, NULL, 0); +} + +/* + * Compare function used to sort the device-tree directories + * This function will be passed to scandir. + */ +static int comparefunc(const void *dentry1, const void *dentry2) +{ + char *str1 = (*(struct dirent **)dentry1)->d_name; + char *str2 = (*(struct dirent **)dentry2)->d_name; + char *p1, *p2; + int res = 0, max_len; + + /* + * strcmp scans from left to right and fails to idetify for some + * strings such as memory@10000000 and memory@f000000. + * Therefore, we get the wrong sorted order like memory@10000000 and + * memory@f000000. + */ + if ((p1 = strchr(str1, '@')) && (p2 = strchr(str2, '@'))) { + max_len = max(p1 - str1, p2 - str2); + if ((res = strncmp(str1, str2, max_len)) == 0) { + /* prefix is equal - compare part after '@' by length */ + p1++; p2++; + res = strlen(p1) - strlen(p2); + if (res == 0) + /* equal length, compare by strcmp() */ + res = strcmp(p1,p2); + } + } else { + res = strcmp(str1, str2); + } + + return res; +} + +/* + * put a node (directory) in the property structure. first properties + * then children. + */ +static void putnode(void) +{ + char *dn; + struct dirent *dp; + char *basename; + struct dirent **namelist; + int numlist, i; + struct stat statbuf; + + numlist = scandir(pathname, &namelist, 0, comparefunc); + if (numlist < 0) + die("unrecoverable error: could not scan \"%s\": %s\n", + pathname, strerror(errno)); + if (numlist == 0) + die("unrecoverable error: no directory entries in \"%s\"", + pathname); + + basename = strrchr(pathname, '/') + 1; + + *dt++ = 1; + strcpy((void *)dt, *basename ? basename : ""); + dt += strlen((void *)dt) / sizeof(unsigned) + 1; + + strcat(pathname, "/"); + dn = pathname + strlen(pathname); + + putprops(dn, namelist, numlist); + + /* + * Add initrd entries to the second kernel + * if + * a) a ramdisk is specified in cmdline + * OR + * b) reuseinitrd is specified and a initrd is + * used by the kernel. + * + */ + if ((ramdisk || (initrd_base && reuse_initrd)) + && !strcmp(basename, "chosen/")) { + int len = 8; + unsigned long long initrd_end; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-start"); + + memcpy(dt, &initrd_base, len); + dt += (len + 3)/4; + + len = 8; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-end"); + + initrd_end = initrd_base + initrd_size; + + memcpy(dt, &initrd_end, len); + dt += (len + 3)/4; + /* reserve the existing initrd image in case of reuse_initrd */ + if (initrd_base && initrd_size && reuse_initrd) + reserve(initrd_base, initrd_size); + } + + for (i = 0; i < numlist; i++) { + dp = namelist[i]; + strcpy(dn, dp->d_name); + free(namelist[i]); + + if (!strcmp(dn, ".") || !strcmp(dn, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (S_ISDIR(statbuf.st_mode)) + putnode(); + } + + *dt++ = 2; + dn[-1] = '\0'; + free(namelist); +} + +int create_flatten_tree(struct kexec_info *info, unsigned char **bufp, + unsigned long *sizep, char *cmdline) +{ + unsigned long len; + unsigned long tlen; + unsigned char *buf; + unsigned long me; + + me = 0; + + strcpy(pathname, "/proc/device-tree/"); + + dt = dtstruct; + + if (cmdline) + strcpy(local_cmdline, cmdline); + + putnode(); + *dt++ = 9; + + len = _ALIGN(sizeof(bb[0]), 8); + + bb->off_mem_rsvmap = len; + + for (len = 1; mem_rsrv[len]; len += 2) + ; + len += 3; + len *= sizeof(mem_rsrv[0]); + + bb->off_dt_struct = bb->off_mem_rsvmap + len; + + len = dt - dtstruct; + len *= sizeof(unsigned); + bb->dt_struct_size = len; + bb->off_dt_strings = bb->off_dt_struct + len; + + len = propnum(""); + bb->dt_strings_size = len; + len = _ALIGN(len, 4); + bb->totalsize = bb->off_dt_strings + len; + + bb->magic = 0xd00dfeed; + bb->version = 17; + bb->last_comp_version = 16; + + reserve(me, bb->totalsize); /* patched later in kexec_load */ + + buf = (unsigned char *) malloc(bb->totalsize); + *bufp = buf; + memcpy(buf, bb, bb->off_mem_rsvmap); + tlen = bb->off_mem_rsvmap; + memcpy(buf+tlen, mem_rsrv, bb->off_dt_struct - bb->off_mem_rsvmap); + tlen = tlen + (bb->off_dt_struct - bb->off_mem_rsvmap); + memcpy(buf+tlen, dtstruct, bb->off_dt_strings - bb->off_dt_struct); + tlen = tlen + (bb->off_dt_strings - bb->off_dt_struct); + memcpy(buf+tlen, propnames, bb->totalsize - bb->off_dt_strings); + tlen = tlen + bb->totalsize - bb->off_dt_strings; + *sizep = tlen; + return 0; +} diff --git a/kexec/arch/ppc/include/arch/options.h b/kexec/arch/ppc/include/arch/options.h new file mode 100644 index 0000000..b2176ab --- /dev/null +++ b/kexec/arch/ppc/include/arch/options.h @@ -0,0 +1,47 @@ +#ifndef KEXEC_ARCH_PPC_OPTIONS_H +#define KEXEC_ARCH_PPC_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_GAMECUBE (OPT_ARCH_MAX+1) +#define OPT_DTB (OPT_ARCH_MAX+2) +#define OPT_NODES (OPT_ARCH_MAX+3) +#define OPT_RAMDISK (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND},\ + {"append", 1, 0, OPT_APPEND},\ + {"ramdisk", 1, 0, OPT_APPEND},\ + {"initrd", 1, 0, OPT_APPEND},\ + {"gamecube", 1, 0, OPT_GAMECUBE},\ + {"dtb", 1, 0, OPT_DTB},\ + {"reuse-node", 1, 0, OPT_NODES}, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + +#endif /* KEXEC_ARCH_PPC_OPTIONS_H */ diff --git a/kexec/arch/ppc/include/page.h b/kexec/arch/ppc/include/page.h new file mode 100644 index 0000000..65877bc --- /dev/null +++ b/kexec/arch/ppc/include/page.h @@ -0,0 +1,27 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/kexec/arch/ppc/include/types.h b/kexec/arch/ppc/include/types.h new file mode 100644 index 0000000..31393d1 --- /dev/null +++ b/kexec/arch/ppc/include/types.h @@ -0,0 +1,27 @@ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef signed char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#endif /* _TYPES_H_ */ diff --git a/kexec/arch/ppc/kexec-dol-ppc.c b/kexec/arch/ppc/kexec-dol-ppc.c new file mode 100644 index 0000000..800c072 --- /dev/null +++ b/kexec/arch/ppc/kexec-dol-ppc.c @@ -0,0 +1,468 @@ +/* + * kexec-dol-ppc.c - kexec DOL executable loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include <arch/options.h> + +static int debug = 0; + +/* + * I've found out there DOLs with unaligned and/or overlapping sections. + * I assume that sizes of sections can be wrong on these DOLs so I trust + * better start of sections. + * In order to load DOLs, I first extend sections to page aligned boundaries + * and then merge overlapping sections starting from lower addresses. + * -- Albert Herranz + */ + +/* DOL related stuff */ + +#define DOL_HEADER_SIZE 0x100 + +#define DOL_SECT_MAX_TEXT 7 /* text sections */ +#define DOL_SECT_MAX_DATA 11 /* data sections */ +#define DOL_MAX_SECT (DOL_SECT_MAX_TEXT+DOL_SECT_MAX_DATA) + +/* this is the DOL executable header */ +typedef struct { + uint32_t offset_text[DOL_SECT_MAX_TEXT]; /* in the file */ + uint32_t offset_data[DOL_SECT_MAX_DATA]; + uint32_t address_text[DOL_SECT_MAX_TEXT]; /* in memory */ + uint32_t address_data[DOL_SECT_MAX_DATA]; + uint32_t size_text[DOL_SECT_MAX_TEXT]; + uint32_t size_data[DOL_SECT_MAX_DATA]; + uint32_t address_bss; + uint32_t size_bss; + uint32_t entry_point; +} dol_header; + +#define dol_sect_offset(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->offset_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->offset_text[index]) +#define dol_sect_address(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->address_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->address_text[index]) +#define dol_sect_size(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->size_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->size_text[index]) +#define dol_sect_type(index) \ + ((index >= DOL_SECT_MAX_TEXT) ? "data" : "text") + +typedef struct { + uint32_t sects_bitmap; + uint32_t start; + uint32_t size; +} dol_segment; + +#define dol_seg_end(s1) \ + (s1->start + s1->size) +#define dol_seg_after_sect(s1, s2) \ + (s1->start >= dol_seg_end(s2)) +#define dol_seg_overlaps(s1, s2) \ + (!(dol_seg_after_sect(s1,s2) || dol_seg_after_sect(s2,s1))) + +/* same as in asm/page.h */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define MAX_COMMAND_LINE 256 + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; + +void print_sects_bitmap(dol_segment * seg) +{ + int i, first_seen; + + printf("\t" "sects_bitmap"); + first_seen = 0; + for (i = 0; i < DOL_MAX_SECT; i++) { + if ((seg->sects_bitmap & (1 << i)) == 0) + continue; + printf("%c%d", (first_seen ? ',' : '='), i); + first_seen = 1; + } + printf("\n"); +} + +void print_dol_segment(dol_segment * seg) +{ + printf("dol segment:\n"); + printf("\t" "start=%08lx, size=%ld (%08lx)\n", + (unsigned long)seg->start, (unsigned long)seg->size, + (unsigned long)seg->size); + printf("\t" "end=%08lx\n", (unsigned long)dol_seg_end(seg)); + print_sects_bitmap(seg); +} + +int load_dol_segments(dol_segment * seg, int max_segs, dol_header * h) +{ + int i, n, remaining; + unsigned int start, size; + unsigned long adj1, adj2, end1; + + n = 0; + remaining = max_segs; + for (i = 0; i < DOL_MAX_SECT && remaining > 0; i++) { + /* zero here means the section is not in use */ + if (dol_sect_size(h, i) == 0) + continue; + + /* we initially map 1 seg to 1 sect */ + seg->sects_bitmap = (1 << i); + + start = dol_sect_address(h, i); + size = dol_sect_size(h, i); + + /* page align the segment */ + seg->start = start & PAGE_MASK; + end1 = start + size; + adj1 = start - seg->start; + adj2 = PAGE_ALIGN(end1) - end1; + seg->size = adj1 + size + adj2; + + //print_dol_segment(seg); + + seg++; + remaining--; + n++; + } + return n; +} + +void fix_dol_segments_overlaps(dol_segment * seg, int max_segs) +{ + int i, j; + dol_segment *p, *pp; + long extra_length; + + /* look for overlapping segments and fix them */ + for (i = 0; i < max_segs; i++) { + p = seg + i; /* segment p */ + + /* not really a segment */ + if (p->size == 0) + continue; + + /* check if overlaps any previous segments */ + for (j = 0; j < i; j++) { + pp = seg + j; /* segment pp */ + + /* not a segment or no overlap */ + if (pp->size == 0 || !dol_seg_overlaps(p, pp)) + continue; + + /* merge the two segments */ + if (pp->start < p->start) { + /* extend pp to include p and delete p */ + extra_length = dol_seg_end(p) - dol_seg_end(pp); + if (extra_length > 0) { + pp->size += extra_length; + } + pp->sects_bitmap |= p->sects_bitmap; + p->size = p->start = p->sects_bitmap = 0; + + /* restart the loop because p was deleted */ + i = 0; + break; + } else { + /* extend p to include pp and delete pp */ + extra_length = dol_seg_end(pp) - dol_seg_end(p); + if (extra_length > 0) { + p->size += extra_length; + } + p->sects_bitmap |= pp->sects_bitmap; + pp->size = pp->start = pp->sects_bitmap = 0; + } + } + } +} + +int dol_ppc_probe(const char *buf, off_t dol_length) +{ + dol_header header, *h; + int i, valid = 0; + + /* the DOL file should be at least as long as the DOL header */ + if (dol_length < DOL_HEADER_SIZE) { + if (debug) { + fprintf(stderr, "Not a DOL file, too short.\n"); + } + return -1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* now perform some sanity checks */ + for (i = 0; i < DOL_MAX_SECT; i++) { + /* DOL segment MAY NOT be physically stored in the header */ + if ((dol_sect_offset(h, i) != 0) + && (dol_sect_offset(h, i) < DOL_HEADER_SIZE)) { + if (debug) { + fprintf(stderr, + "%s segment offset within DOL header\n", + dol_sect_type(i)); + } + return -1; + } + + /* end of physical storage must be within file */ + if ((uintmax_t)(dol_sect_offset(h, i) + dol_sect_size(h, i)) > + (uintmax_t)dol_length) { + if (debug) { + fprintf(stderr, + "%s segment past DOL file size\n", + dol_sect_type(i)); + } + return -1; + } + + /* we only should accept DOLs with segments above 2GB */ + if (dol_sect_address(h, i) != 0 + && !(dol_sect_address(h, i) & 0x80000000)) { + fprintf(stderr, "warning, %s segment below 2GB\n", + dol_sect_type(i)); + } + + if (i < DOL_SECT_MAX_TEXT) { + /* remember that entrypoint was in a code segment */ + if (h->entry_point >= dol_sect_address(h, i) + && h->entry_point < dol_sect_address(h, i) + + dol_sect_size(h, i)) + valid = 1; + } + } + + /* if there is a BSS segment it must^H^H^H^Hshould be above 2GB, too */ + if (h->address_bss != 0 && !(h->address_bss & 0x80000000)) { + fprintf(stderr, "warning, BSS segment below 2GB\n"); + } + + /* if entrypoint is not within a code segment reject this file */ + if (!valid) { + if (debug) { + fprintf(stderr, "Entry point out of text segment\n"); + } + return -1; + } + + /* I've got a dol */ + return 0; +} + +void dol_ppc_usage(void) +{ + printf + (" --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n"); + +} + +int dol_ppc_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + dol_header header, *h; + unsigned long entry; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + const char *command_line; + int command_line_len; + unsigned long mstart; + dol_segment dol_segs[DOL_MAX_SECT]; + unsigned int sects_bitmap; + unsigned long lowest_start; + int i, j, k; + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + + /* + * Parse the command line arguments + */ + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* set entry point */ + entry = h->entry_point; + + /* convert the DOL sections into suitable page aligned segments */ + memset(dol_segs, 0, sizeof(dol_segs)); + + load_dol_segments(dol_segs, DOL_MAX_SECT, h); + fix_dol_segments_overlaps(dol_segs, DOL_MAX_SECT); + + /* load rest of segments */ + for (i = 0; i < DOL_MAX_SECT; i++) { + unsigned char *seg_buf; + /* not a segment */ + if (dol_segs[i].size == 0) + continue; + + //print_dol_segment(&dol_segs[i]); + + /* prepare segment */ + seg_buf = xmalloc(dol_segs[i].size); + mstart = dol_segs[i].start; + if (mstart & 0xf0000000) { + /* + * GameCube DOLs expect memory mapped this way: + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + mstart &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } + add_segment(info, + seg_buf, dol_segs[i].size, + mstart, dol_segs[i].size); + + + /* load sections into segment memory, according to bitmap */ + sects_bitmap = 0; + while (sects_bitmap != dol_segs[i].sects_bitmap) { + unsigned char *sec_buf; + /* find lowest start address for section */ + lowest_start = 0xffffffff; + for (j = -1, k = 0; k < DOL_MAX_SECT; k++) { + /* continue if section is already done */ + if ((sects_bitmap & (1 << k)) != 0) + continue; + /* do nothing for non sections */ + if ((dol_segs[i].sects_bitmap & (1 << k)) == 0) + continue; + /* found new candidate */ + if (dol_sect_address(h, k) < lowest_start) { + lowest_start = dol_sect_address(h, k); + j = k; + } + } + /* mark section as being loaded */ + sects_bitmap |= (1 << j); + + /* read it from file to the right place */ + sec_buf = seg_buf + + (dol_sect_address(h, j) - dol_segs[i].start); + memcpy(sec_buf, buf + dol_sect_offset(h, j), + dol_sect_size(h, j)); + } + } + + /* build the setup glue and argument segment (segment 0) */ + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_dol_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_dol_size, 4)); + + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + setup_dol_regs.spr8 = entry; /* Link Register */ + + memcpy(arg_buf, setup_dol_start, setup_dol_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + + if (debug) { + fprintf(stdout, "entry = %p\n", (void *)arg_base); + print_segments(stdout, info); + } + + info->entry = (void *)arg_base; + return 0; +} diff --git a/kexec/arch/ppc/kexec-elf-ppc.c b/kexec/arch/ppc/kexec-elf-ppc.c new file mode 100644 index 0000000..4a4886e --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-ppc.c @@ -0,0 +1,458 @@ +/* + * kexec-elf-ppc.c - kexec Elf loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc.h" +#include <arch/options.h> +#include "../../kexec-syscall.h" +#include "crashdump-powerpc.h" + +#include "config.h" +#include "fixup_dtb.h" + +static const int probe_debug = 0; + +unsigned char reuse_initrd; +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4); +#ifdef WITH_GAMECUBE +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; +#endif + +int elf_ppc_probe(const char *buf, off_t len) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_PPC) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +#ifdef WITH_GAMECUBE +static void gamecube_hack_addresses(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr, *phdr_end; + phdr_end = ehdr->e_phdr + ehdr->e_phnum; + for(phdr = ehdr->e_phdr; phdr != phdr_end; phdr++) { + /* + * GameCube ELF kernel is linked with memory mapped + * this way (to easily transform it into a DOL + * suitable for being loaded with psoload): + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + phdr->p_paddr &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } +} +#endif + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"gamecube", 1, 0, OPT_GAMECUBE}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void elf_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --gamecube=1|0 Enable/disable support for ELFs with changed\n" + " addresses suitable for the GameCube.\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line, *crash_cmdline, *cmdline_buf; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + int result; + char *error_msg; + unsigned long max_addr, hole_addr; + struct mem_phdr *phdr; + size_t size; +#ifdef CONFIG_PPC64 + unsigned long toc_addr; +#endif +#ifdef WITH_GAMECUBE + int target_is_gamecube = 1; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + unsigned char *setup_start; + uint32_t setup_size; +#else + char *seg_buf = NULL; + off_t seg_size = 0; + int target_is_gamecube = 0; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#endif + unsigned long kernel_addr; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + char *blob_buf = NULL; + off_t blob_size = 0; + + command_line = tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + hole_addr = 0; + kernel_addr = 0; + ramdisk = 0; + result = 0; + error_msg = NULL; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_GAMECUBE: + target_is_gamecube = atoi(optarg); + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line); + + fixup_nodes[cur_fixup] = NULL; + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + gamecube_hack_addresses(&ehdr); + } +#endif + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kernel_addr = locate_hole(info, size, 0, 0, max_addr, 1); +#ifdef CONFIG_PPC64 + ehdr.e_phdr[0].p_paddr = (Elf64_Addr)kernel_addr; +#else + ehdr.e_phdr[0].p_paddr = kernel_addr; +#endif + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) { + goto out; + } + + /* + * Need to append some command line parameters internally in case of + * taking crash dumps. Additional segments need to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + result = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (result < 0) { + result = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + /* + * In case of a toy we take the hardcoded things and an easy setup via + * one of the assembly startups. Every thing else should be grown up + * and go through the purgatory. + */ +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + setup_start = setup_dol_start; + setup_size = setup_dol_size; + setup_dol_regs.spr8 = ehdr.e_entry; /* Link Register */ + } else { + setup_start = setup_simple_start; + setup_size = setup_simple_size; + setup_simple_regs.spr8 = ehdr.e_entry; /* Link Register */ + } + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, elf_max_addr(&ehdr), 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_size, 4)); + + memcpy(arg_buf, setup_start, setup_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + info->entry = (void *)arg_base; +#else + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds size\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + if (command_line) + strncat(cmdline_buf, command_line, command_line_len); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, + sizeof(crash_cmdline) - + strlen(crash_cmdline) - 1); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, elf_max_addr(&ehdr), 1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, kernel_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* load the ramdisk *above* the device tree */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size + 1, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + kernel_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* + * set various variables for the purgatory. + * ehdr.e_entry is a virtual address. we know physical start + * address of the kernel (kernel_addr). Find the offset of + * e_entry from the virtual start address(e_phdr[0].p_vaddr) + * and calculate the actual physical address of the 'kernel entry'. + */ + addr = kernel_addr + (ehdr.e_entry - ehdr.e_phdr[0].p_vaddr); + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", + &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, + elf_max_addr(&ehdr), 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)elf_max_addr(&ehdr) - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); + +#undef TCB_SIZE +#undef TCB_TP_OFFSET + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +#endif +out: + free_elf_info(&ehdr); + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + + return result; +} diff --git a/kexec/arch/ppc/kexec-elf-rel-ppc.c b/kexec/arch/ppc/kexec-elf-rel-ppc.c new file mode 100644 index 0000000..1acbd86 --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-rel-ppc.c @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_PPC) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HI: + *(uint16_t *)location = (value>>16) & 0xffff; + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - address) < -0x02000000 + || (int)(value - address) >= 0x02000000) + { + die("Symbol more than 16MiB away"); + } + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - address) + & 0x03fffffc); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - address; + break; + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/ppc/kexec-ppc.c b/kexec/arch/ppc/kexec-ppc.c new file mode 100644 index 0000000..03bec36 --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.c @@ -0,0 +1,968 @@ +/* + * kexec-ppc.c - kexec for the PowerPC + * Copyright (C) 2004, 2005 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" +#include <arch/options.h> + +#include "config.h" + +unsigned long dt_address_cells = 0, dt_size_cells = 0; +uint64_t rmo_top; +uint64_t memory_limit; +unsigned long long crash_base = 0, crash_size = 0; +unsigned long long initrd_base = 0, initrd_size = 0; +unsigned long long ramdisk_base = 0, ramdisk_size = 0; +unsigned int rtas_base, rtas_size; +int max_memory_ranges; +const char *ramdisk; + +/* + * Reads the #address-cells and #size-cells on this platform. + * This is used to parse the memory/reg info from the device-tree + */ +int init_memory_region_info() +{ + size_t res = 0; + int fd; + char *file; + + file = "/proc/device-tree/#address-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_address_cells, sizeof(dt_address_cells)); + if (res != sizeof(dt_address_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + file = "/proc/device-tree/#size-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_size_cells, sizeof(dt_size_cells)); + if (res != sizeof(dt_size_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + /* Convert the sizes into bytes */ + dt_size_cells *= sizeof(unsigned long); + dt_address_cells *= sizeof(unsigned long); + + return 0; +} + +#define MAXBYTES 128 +/* + * Reads the memory region info from the device-tree node pointed + * by @fd and fills the *start, *end with the boundaries of the region + */ +int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end) +{ + char buf[MAXBYTES]; + unsigned long *p; + unsigned long nbytes = dt_address_cells + dt_size_cells; + + if (lseek(fd, 0, SEEK_SET) == -1) { + fprintf(stderr, "Error in file seek\n"); + return -1; + } + if (read(fd, buf, nbytes) != nbytes) { + fprintf(stderr, "Error reading the memory region info\n"); + return -1; + } + + p = (unsigned long*)buf; + if (dt_address_cells == sizeof(unsigned long)) { + *start = p[0]; + p++; + } else if (dt_address_cells == sizeof(unsigned long long)) { + *start = ((unsigned long long *)p)[0]; + p = (unsigned long long *)p + 1; + } else { + fprintf(stderr, "Unsupported value for #address-cells : %ld\n", + dt_address_cells); + return -1; + } + + if (dt_size_cells == sizeof(unsigned long)) + *end = *start + p[0]; + else if (dt_size_cells == sizeof(unsigned long long)) + *end = *start + ((unsigned long long *)p)[0]; + else { + fprintf(stderr, "Unsupported value for #size-cells : %ld\n", + dt_size_cells); + return -1; + } + + return 0; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +#ifdef WITH_GAMECUBE +#define MAX_MEMORY_RANGES 64 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +static int get_memory_ranges_gc(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + /* RAM - lowmem used by DOLs - framebuffer */ + memory_range[memory_ranges].start = 0x00003000; + memory_range[memory_ranges].end = 0x0174bfff; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + *range = memory_range; + *ranges = memory_ranges; + return 0; +} +#else +static int use_new_dtb; +static int nr_memory_ranges, nr_exclude_ranges; +static struct memory_range *exclude_range; +static struct memory_range *memory_range; +static struct memory_range *base_memory_range; +static uint64_t memory_max; + +/* + * Count the memory nodes under /proc/device-tree and populate the + * max_memory_ranges variable. This variable replaces MAX_MEMORY_RANGES + * macro used earlier. + */ +static int count_memory_ranges(void) +{ + char device_tree[256] = "/proc/device-tree/"; + struct dirent *dentry; + DIR *dir; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + max_memory_ranges++; + } + + /* need to add extra region for retained initrd */ + if (use_new_dtb) { + max_memory_ranges++; + } + + closedir(dir); + return 0; + +} + + static void cleanup_memory_ranges(void) + { + free(memory_range); + free(base_memory_range); + free(exclude_range); + } + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = malloc(memory_range_len); + if (!exclude_range) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, + memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, + memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, + memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, + memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + int local_memory_ranges = 0; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + struct dirent *dentry, *mentry; + int n, fd; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + unsigned long long start, end; + + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if (read_memory_region_limits(fd, &start, &end) != 0) { + close(fd); + closedir(dmem); + closedir(dir); + return -1; + } + if (local_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0){ + close(fd); + break; + } + } + + base_memory_range[local_memory_ranges].start = start; + base_memory_range[local_memory_ranges].end = end; + base_memory_range[local_memory_ranges].type = RANGE_RAM; + local_memory_ranges++; + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[local_memory_ranges-1].start, + base_memory_range[local_memory_ranges-1].end, + base_memory_range[local_memory_ranges-1].type); + close(fd); + } + closedir(dmem); + } + closedir(dir); + nr_memory_ranges = local_memory_ranges; + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +static int read_kernel_memory_limit(char *fname, char *buf) +{ + FILE *file; + int n; + + if (!fname || !buf) + return -1; + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return -1; + } + errno = 0; + /* + * fall through. On older kernel this file + * is not present. Hence return success. + */ + } else { + /* Memory limit property is of u64 type. */ + if ((n = fread(&memory_limit, 1, sizeof(uint64_t), file)) < 0) { + perror(fname); + goto err_out; + } + if (n != sizeof(uint64_t)) { + fprintf(stderr, "%s node has invalid size: %d\n", + fname, n); + goto err_out; + } + fclose(file); + } + return 0; +err_out: + fclose(file); + return -1; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rmo_base; + unsigned long long tce_base; + unsigned int tce_size; + unsigned long long htab_base, htab_size; + unsigned long long kernel_end; + unsigned long long initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strncmp(dentry->d_name, "memory", 6) && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + /* only reserve kernel region if we are doing a crash kernel */ + if (kexec_flags & KEXEC_ON_CRASH) { + strcat(fname, "/linux,kernel-end"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + kernel_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + kernel_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-base"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_base = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_base = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_size = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_size = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + +#ifndef CONFIG_BOOKE + add_usable_mem_rgns(0, crash_base + crash_size); + /* Reserve the region (KDUMP_BACKUP_LIMIT,crash_base) */ + reserve(KDUMP_BACKUP_LIMIT, + crash_base-KDUMP_BACKUP_LIMIT); +#else + add_usable_mem_rgns(crash_base, crash_size); +#endif + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + snprintf(fname, sizeof(fname), "%s%s%s", device_tree, + dentry->d_name, "/linux,memory-limit"); + if (read_kernel_memory_limit(fname, buf) < 0) + goto error_opencdir; + + /* reserve the initrd_start and end locations. */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-start"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_start = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_start = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_start = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-end"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_end = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + if ((initrd_end - initrd_start) != 0 ) { + initrd_base = initrd_start; + initrd_size = initrd_end - initrd_start; + } + + if (reuse_initrd) { + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + + /* HTAB */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + } /* chosen */ + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + closedir(cdir); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + int fd; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + goto error_opencdir; + } + if (read_memory_region_limits(fd, &rmo_base, &rmo_top) != 0) + goto error_openfile; + + if (rmo_top > 0x30000000UL) + rmo_top = 0x30000000UL; + + close(fd); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,tce-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + + +/* Setup a sorted list of memory ranges. */ +static int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit the end to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit range to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + } + + /* fixup in case we have no exclude regions */ + if (!j) { + memory_range[0].start = base_memory_range[0].start; + memory_range[0].end = rmo_top; + memory_range[0].type = RANGE_RAM; + nr_memory_ranges = 1; + } else + nr_memory_ranges = j; + + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + + +/* Return a list of valid memory ranges */ +int get_memory_ranges_dt(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + if (count_memory_ranges()) + return -1; + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + *range = memory_range; + *ranges = nr_memory_ranges; + return 0; +} +#endif + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int res = 0; + + res = init_memory_region_info(); + if (res != 0) + return res; +#ifdef WITH_GAMECUBE + return get_memory_ranges_gc(range, ranges, kexec_flags); +#else + return get_memory_ranges_dt(range, ranges, kexec_flags); +#endif +} + +struct file_type file_type[] = { + {"elf-ppc", elf_ppc_probe, elf_ppc_load, elf_ppc_usage}, + {"dol-ppc", dol_ppc_probe, dol_ppc_load, dol_ppc_usage}, + {"uImage-ppc", uImage_ppc_probe, uImage_ppc_load, uImage_ppc_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_PPC here. + */ + { "ppc", KEXEC_ARCH_DEFAULT }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + diff --git a/kexec/arch/ppc/kexec-ppc.h b/kexec/arch/ppc/kexec-ppc.h new file mode 100644 index 0000000..04e728e --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_PPC_H +#define KEXEC_PPC_H + +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ + +extern unsigned char setup_simple_start[]; +extern uint32_t setup_simple_size; + +extern struct { + uint32_t spr8; +} setup_simple_regs; + +extern unsigned char setup_dol_start[]; +extern uint32_t setup_dol_size; +extern uint64_t rmo_top; + +extern struct { + uint32_t spr8; +} setup_dol_regs; + +#define SIZE_16M (16*1024*1024UL) + +int elf_ppc_probe(const char *buf, off_t len); +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc_usage(void); + +int uImage_ppc_probe(const char *buf, off_t len); +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void uImage_ppc_usage(void); + +int dol_ppc_probe(const char *buf, off_t len); +int dol_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void dol_ppc_usage(void); + +/* + * During inital setup the kernel does not map the whole memory but a part of + * it. On Book-E that is 64MiB, 601 24MiB or 256MiB (if possible). + */ +#define KERNEL_ACCESS_TOP (24 * 1024 * 1024) + +/* boot block version 17 as defined by the linux kernel */ +struct bootblock { + unsigned magic, + totalsize, + off_dt_struct, + off_dt_strings, + off_mem_rsvmap, + version, + last_comp_version, + boot_physid, + dt_strings_size, + dt_struct_size; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; +extern mem_rgns_t usablemem_rgns; +extern int max_memory_ranges; +extern unsigned long long crash_base, crash_size; +extern unsigned long long initrd_base, initrd_size; +extern unsigned long long ramdisk_base, ramdisk_size; +extern unsigned char reuse_initrd; +extern const char *ramdisk; + +/* Method to parse the memory/reg nodes in device-tree */ +extern unsigned long dt_address_cells, dt_size_cells; +extern int init_memory_region_info(void); +extern int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end); +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); +/*fs2dt*/ +void reserve(unsigned long long where, unsigned long long length); + +/* Defined kexec-uImage-ppc.c */ +extern char* slurp_ramdisk_ppc(const char *filename, off_t *r_size); +#endif /* KEXEC_PPC_H */ diff --git a/kexec/arch/ppc/kexec-uImage-ppc.c b/kexec/arch/ppc/kexec-uImage-ppc.c new file mode 100644 index 0000000..e8f7adc --- /dev/null +++ b/kexec/arch/ppc/kexec-uImage-ppc.c @@ -0,0 +1,325 @@ +/* + * uImage support for PowerPC + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <getopt.h> +#include <arch/options.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "fixup_dtb.h" +#include <kexec-uImage.h> +#include "crashdump-powerpc.h" +#include <limits.h> + +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void uImage_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +/* + * Load the ramdisk into buffer. + * If the supplied image is in uImage format use + * uImage_load() to read the payload from the image. + */ +char *slurp_ramdisk_ppc(const char *filename, off_t *r_size) +{ + struct Image_info img; + off_t size; + const char *buf = slurp_file(filename, &size); + int rc; + + /* Check if this is a uImage RAMDisk */ + if (!buf) + return buf; + rc = uImage_probe_ramdisk(buf, size, IH_ARCH_PPC); + if (rc < 0) + die("uImage: Corrupted ramdisk file %s\n", filename); + else if (rc == 0) { + if (uImage_load(buf, size, &img) != 0) + die("uImage: Reading %ld bytes from %s failed\n", + size, filename); + buf = img.buf; + size = img.len; + } + + *r_size = size; + return buf; +} + +int uImage_ppc_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_PPC); +} + +static int ppc_load_bare_bits(int argc, char **argv, const char *buf, + off_t len, struct kexec_info *info, unsigned int load_addr, + unsigned int ep) +{ + char *command_line, *cmdline_buf, *crash_cmdline; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + int ret = 0; + char *seg_buf = NULL; + off_t seg_size = 0; + unsigned long long hole_addr; + unsigned long max_addr; + char *blob_buf = NULL; + off_t blob_size = 0; + char *error_msg = NULL; + + cmdline_buf = NULL; + command_line = NULL; + tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + + case OPT_RAMDISK: + ramdisk = optarg; + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line) + 1; + + fixup_nodes[cur_fixup] = NULL; + + /* + * len contains the length of the whole kernel image except the bss + * section. The 1 MiB should cover it. The purgatory and the dtb are + * allocated from memtop down towards zero so we should never get too + * close to the bss :) + */ +#define _1MiB (1 * 1024 * 1024) + + /* + * If the provided load_addr cannot be allocated, find a new + * area. Rebase the entry point based on the new load_addr. + */ + if (!valid_memory_range(info, load_addr, load_addr + (len + _1MiB))) { + int ep_offset = ep - load_addr; + + load_addr = locate_hole(info, len + _1MiB, 0, 0, max_addr, 1); + if (load_addr == ULONG_MAX) { + printf("Can't allocate memory for kernel of len %ld\n", + len + _1MiB); + return -1; + } + + ep = load_addr + ep_offset; + } + + add_segment(info, buf, len, load_addr, len + _1MiB); + + + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + ret = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (ret < 0) { + ret = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds maximum possible length\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + + if (command_line) + strcpy(cmdline_buf, command_line); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, crash_cmdline_len); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, -1, -1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, load_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* Load ramdisk at top of memory */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + load_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + printf("dtb_addr_actual: %lx, dtb_addr: %lx\n", dtb_addr_actual, dtb_addr); + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* set various variables for the purgatory */ + addr = ep; + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, -1, 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); + /* No allocation past here in order not to overwrite the stack */ +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)-1 - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); +#undef TCB_TP_OFFSET +#undef TCB_SIZE + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +out: + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + return ret; +} + +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct Image_info img; + int ret; + + ret = uImage_load(buf, len, &img); + if (ret) + return ret; + + return ppc_load_bare_bits(argc, argv, img.buf, img.len, info, + img.base, img.ep); +} diff --git a/kexec/arch/ppc/libfdt-wrapper.c b/kexec/arch/ppc/libfdt-wrapper.c new file mode 100644 index 0000000..ef355d0 --- /dev/null +++ b/kexec/arch/ppc/libfdt-wrapper.c @@ -0,0 +1,189 @@ +/* + * This file does the necessary interface mapping between the bootwrapper + * device tree operations and the interface provided by shared source + * files flatdevicetree.[ch]. + * + * Copyright 2007 David Gibson, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#include <page.h> +#include <libfdt.h> +#include "ops.h" +#include "../../kexec.h" + +#define BAD_ERROR(err) (((err) < 0) \ + && ((err) != -FDT_ERR_NOTFOUND) \ + && ((err) != -FDT_ERR_EXISTS)) + +#define check_err(err) \ + ({ \ + if (BAD_ERROR(err) || ((err < 0) && kexec_debug)) \ + printf("%s():%d %s\n\r", __func__, __LINE__, \ + fdt_strerror(err)); \ + if (BAD_ERROR(err)) \ + exit(1); \ + (err < 0) ? -1 : 0; \ + }) + +#define offset_devp(off) \ + ({ \ + int _offset = (off); \ + check_err(_offset) ? NULL : (void *)(_offset+1); \ + }) + +#define devp_offset_find(devp) (((int)(devp))-1) +#define devp_offset(devp) (devp ? ((int)(devp))-1 : 0) + +static void *fdt; +static void *buf; /* = NULL */ +struct dt_ops dt_ops; + +#define EXPAND_GRANULARITY 1024 + +static void expand_buf(int minexpand) +{ + int size = fdt_totalsize(fdt); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + buf = realloc(buf, size); + if (!buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(fdt, buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + fdt = buf; +} + +static void *fdt_wrapper_finddevice(const char *path) +{ + return offset_devp(fdt_path_offset(fdt, path)); +} + +static int fdt_wrapper_getprop(const void *devp, const char *name, + void *buf, const int buflen) +{ + const void *p; + int len; + + p = fdt_getprop(fdt, devp_offset(devp), name, &len); + if (!p) + return check_err(len); + memcpy(buf, p, min(len, buflen)); + return len; +} + +static int fdt_wrapper_setprop(const void *devp, const char *name, + const void *buf, const int len) +{ + int rc; + + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + if (rc == -FDT_ERR_NOSPACE) { + expand_buf(len + 16); + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + } + + return check_err(rc); +} + +static void *fdt_wrapper_get_parent(const void *devp) +{ + return offset_devp(fdt_parent_offset(fdt, devp_offset(devp))); +} + +static void *fdt_wrapper_create_node(const void *devp, const char *name) +{ + int offset; + + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + if (offset == -FDT_ERR_NOSPACE) { + expand_buf(strlen(name) + 16); + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + } + + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_prop_value(const void *prev, + const char *name, + const char *val, + int len) +{ + int offset = fdt_node_offset_by_prop_value(fdt, devp_offset_find(prev), + name, val, len); + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_compatible(const void *prev, + const char *val) +{ + int offset = fdt_node_offset_by_compatible(fdt, devp_offset_find(prev), + val); + return offset_devp(offset); +} + +static char *fdt_wrapper_get_path(const void *devp, char *buf, int len) +{ + int rc; + + rc = fdt_get_path(fdt, devp_offset(devp), buf, len); + if (check_err(rc)) + return NULL; + return buf; +} + +static unsigned long fdt_wrapper_finalize(void) +{ + int rc; + + rc = fdt_pack(fdt); + if (rc != 0) + die("Couldn't pack flat tree: %s\n\r", + fdt_strerror(rc)); + return (unsigned long)fdt; +} + +void fdt_init(void *blob) +{ + int err; + int bufsize; + + dt_ops.finddevice = fdt_wrapper_finddevice; + dt_ops.getprop = fdt_wrapper_getprop; + dt_ops.setprop = fdt_wrapper_setprop; + dt_ops.get_parent = fdt_wrapper_get_parent; + dt_ops.create_node = fdt_wrapper_create_node; + dt_ops.find_node_by_prop_value = fdt_wrapper_find_node_by_prop_value; + dt_ops.find_node_by_compatible = fdt_wrapper_find_node_by_compatible; + dt_ops.get_path = fdt_wrapper_get_path; + dt_ops.finalize = fdt_wrapper_finalize; + + /* Make sure the dt blob is the right version and so forth */ + fdt = blob; + bufsize = fdt_totalsize(fdt); + + err = fdt_open_into(fdt, fdt, bufsize); + if (err != 0) + die("fdt_init(): %s\n\r", fdt_strerror(err)); +} diff --git a/kexec/arch/ppc/ops.h b/kexec/arch/ppc/ops.h new file mode 100644 index 0000000..5e7a070 --- /dev/null +++ b/kexec/arch/ppc/ops.h @@ -0,0 +1,147 @@ +/* + * Global definition of all the bootwrapper operations. + * + * Author: Mark A. Greer <mgreer@mvista.com> + * + * 2006 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifndef _PPC_BOOT_OPS_H_ +#define _PPC_BOOT_OPS_H_ +#include "types.h" + +#define MAX_PATH_LEN 256 +#define MAX_PROP_LEN 256 /* What should this be? */ + +typedef void (*kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5); + +/* Device Tree operations */ +struct dt_ops { + void * (*finddevice)(const char *name); + int (*getprop)(const void *phandle, const char *name, void *buf, + const int buflen); + int (*setprop)(const void *phandle, const char *name, + const void *buf, const int buflen); + void *(*get_parent)(const void *phandle); + /* The node must not already exist. */ + void *(*create_node)(const void *parent, const char *name); + void *(*find_node_by_prop_value)(const void *prev, + const char *propname, + const char *propval, int proplen); + void *(*find_node_by_compatible)(const void *prev, + const char *compat); + unsigned long (*finalize)(void); + char *(*get_path)(const void *phandle, char *buf, int len); +}; +extern struct dt_ops dt_ops; + +void fdt_init(void *blob); +extern void flush_cache(void *, unsigned long); +int dt_xlate_reg(void *node, int res, unsigned long *addr, unsigned long *size); +int dt_xlate_addr(void *node, u32 *buf, int buflen, unsigned long *xlated_addr); +int dt_is_compatible(void *node, const char *compat); +void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize); +int dt_get_virtual_reg(void *node, void **addr, int nres); + +static inline void *finddevice(const char *name) +{ + return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL; +} + +static inline int getprop(void *devp, const char *name, void *buf, int buflen) +{ + return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1; +} + +static inline int setprop(void *devp, const char *name, + const void *buf, int buflen) +{ + return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1; +} +#define setprop_val(devp, name, val) \ + do { \ + typeof(val) x = (val); \ + setprop((devp), (name), &x, sizeof(x)); \ + } while (0) + +static inline int setprop_str(void *devp, const char *name, const char *buf) +{ + if (dt_ops.setprop) + return dt_ops.setprop(devp, name, buf, strlen(buf) + 1); + + return -1; +} + +static inline void *get_parent(const char *devp) +{ + return dt_ops.get_parent ? dt_ops.get_parent(devp) : NULL; +} + +static inline void *create_node(const void *parent, const char *name) +{ + return dt_ops.create_node ? dt_ops.create_node(parent, name) : NULL; +} + + +static inline void *find_node_by_prop_value(const void *prev, + const char *propname, + const char *propval, int proplen) +{ + if (dt_ops.find_node_by_prop_value) + return dt_ops.find_node_by_prop_value(prev, propname, + propval, proplen); + + return NULL; +} + +static inline void *find_node_by_prop_value_str(const void *prev, + const char *propname, + const char *propval) +{ + return find_node_by_prop_value(prev, propname, propval, + strlen(propval) + 1); +} + +static inline void *find_node_by_devtype(const void *prev, + const char *type) +{ + return find_node_by_prop_value_str(prev, "device_type", type); +} + +static inline void *find_node_by_alias(const char *alias) +{ + void *devp = finddevice("/aliases"); + + if (devp) { + char path[MAX_PATH_LEN]; + if (getprop(devp, alias, path, MAX_PATH_LEN) > 0) + return finddevice(path); + } + + return NULL; +} + +static inline void *find_node_by_compatible(const void *prev, + const char *compat) +{ + if (dt_ops.find_node_by_compatible) + return dt_ops.find_node_by_compatible(prev, compat); + + return NULL; +} + +#define dt_fixup_mac_addresses(...) \ + __dt_fixup_mac_addresses(0, __VA_ARGS__, NULL) + + +static inline char *get_path(const void *phandle, char *buf, int len) +{ + if (dt_ops.get_path) + return dt_ops.get_path(phandle, buf, len); + + return NULL; +} + +#endif /* _PPC_BOOT_OPS_H_ */ diff --git a/kexec/arch/ppc/ppc-setup-dol.S b/kexec/arch/ppc/ppc-setup-dol.S new file mode 100644 index 0000000..17169bd --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-dol.S @@ -0,0 +1,174 @@ +/* + * ppc-setup-dol.S - setup glue for Nintendo's GameCube + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include "ppc_asm.h" + + .data + .globl setup_dol_start +setup_dol_start: + + /* Try to reproduce the GameCube "native" environment */ + + /* Setup BATs */ + isync + li r8, 0 + mtspr DBAT0U, r8 + mtspr DBAT0L, r8 + mtspr DBAT1U, r8 + mtspr DBAT1L, r8 + mtspr DBAT2U, r8 + mtspr DBAT2L, r8 + mtspr DBAT3U, r8 + mtspr DBAT3L, r8 + mtspr IBAT0U, r8 + mtspr IBAT0L, r8 + mtspr IBAT1U, r8 + mtspr IBAT1L, r8 + mtspr IBAT2U, r8 + mtspr IBAT2L, r8 + mtspr IBAT3U, r8 + mtspr IBAT3L, r8 + + /* + * Memory Map + * start end size description + * 0x80000000 0x817fffff 24MB RAM, uncached + * 0xc0000000 0xc17fffff 24MB RAM, cached + * 0xc8000000 0xc81fffff 2MB Embedded Framebuffer + * 0xcc000000 Hardware registers + * 0xe0000000 Layer 2 transfer cache ??? 256KB + * + */ + + isync + lis r8, 0x8000 /* IBAT0,DBAT0 for first 16Mbytes */ + ori r8, r8, 0x01ff /* 16MB */ + mtspr IBAT0U, r8 + mtspr DBAT0U, r8 + li r8, 0x0002 /* rw */ + mtspr IBAT0L, r8 + mtspr DBAT0L, r8 + + lis r8, 0xc000 /* DBAT1 for IO mem */ + ori r8, r8, 0x1fff /* 256MB */ + mtspr DBAT1U, r8 + li r8, 0x002a /* uncached, guarded ,rw */ + mtspr DBAT1L, r8 + + lis r8, 0x8100 /* IBAT2,DBAT2 for next 8Mbytes */ + ori r8, r8, 0x00ff /* 8MB */ + mtspr IBAT2U, r8 + mtspr DBAT2U, r8 + lis r8, 0x0100 + ori r8, r8, 0x0002 /* rw */ + mtspr IBAT2L, r8 + mtspr DBAT2L, r8 + + lis r8, 0xe000 /* DBAT3 for layer 2 transfer cache ??? */ + ori r8, r8, 0x01fe /* 16MB ??? */ + mtspr DBAT3U, r8 + lis r8, 0xe000 + ori r8, r8, 0x0002 /* rw */ + mtspr DBAT3L, r8 + + sync + isync + +/* AFAIK, this is not strictly needed, although seems sane */ +#if 1 + li r9, 0 + + /* page table pointer */ + sync + mtspr SDR1, r9 + + /* segment registers */ + li r8, 16 + mtctr r8 + li r8, 0 +1: mtsrin r9, r8 /* zero */ + sync + addis r8,r8,0x1000 /* next register */ + bdnz 1b +#endif + + /* switch MMU on and continue */ + RELOC_SYM(1f) + mfmsr r0 + ori r0, r0, MSR_RI|MSR_ME|MSR_DR|MSR_IR + mtspr SRR1, r0 + oris r3, r3, 0x8000 /* adjust text address */ + mtspr SRR0, r3 + oris r1, r1, 0x8000 /* adjust stack */ + sync + rfi + +1: + /* from now on we run in a DOL-like environment */ + + + /* first, sanitize the hardware a little bit */ + /* although seems to be not needed in the general case */ + +#if 1 + /* audio */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + sth r9, 0x5036(r8) /* stop audio sample */ + stw r9, 0x6c00(r8) /* stop streaming */ + stw r9, 0x6c04(r8) /* mute */ + + /* video */ + mfspr r8, 920 /* spr920 = HID2 */ + rlwinm r8, r8, 0, 4, 2 /* stop GX FIFO, and more */ + mtspr 920, r8 + + /* exi */ + lis r8, 0xcc00 /* io mem */ +1: lwz r9,0x680c(r8) /* wait for dma transfer to complete */ + andi. r9,r9,1 + bne+ 1b + stw r9,0x6800(r8) /* disable exi interrupts */ + addi r8,r8,0x14 /* next channel */ + andi. r9,r8,0x40 /* XXX 4 channels? */ + beq+ 1b + + /* pic */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + stw r9, 0x3004(r8) /* mask all interrupts */ + stw r9, 0x3000(r8) /* clear interrupt cause */ + + /* invalidate L1 data and instructions caches */ + mfspr r8, HID0 + ori r8, r8, HID0_ICFI|HID0_DCI + mtspr HID0, r8 +#endif + + /* jump to our entry point */ + RELOC_SYM(setup_dol_regs) + mr r9, r3 + lwz r5, spr8 - setup_dol_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_dol_regs +setup_dol_regs: +spr8: .long 0x00000000 + + .balign 4 +//#include "isobel_reloc_debug_console.s" + +setup_dol_end: + + .globl setup_dol_size +setup_dol_size: + .long setup_dol_end - setup_dol_start + diff --git a/kexec/arch/ppc/ppc-setup-simple.S b/kexec/arch/ppc/ppc-setup-simple.S new file mode 100644 index 0000000..1317a8d --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-simple.S @@ -0,0 +1,39 @@ +/* + * ppc-setup-simple.S - (hopefully) setup for simple embedded platforms + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* + * Only suitable for platforms booting with MMU turned off. + * -- Albert Herranz + */ + +#include "ppc_asm.h" + + .data + .globl setup_simple_start +setup_simple_start: + + /* should perform here any required setup */ + + RELOC_SYM(setup_simple_regs) + mr r9, r3 + lwz r5, spr8 - setup_simple_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_simple_regs +setup_simple_regs: +spr8: .long 0x00000000 + +setup_simple_end: + + .globl setup_simple_size +setup_simple_size: + .long setup_simple_end - setup_simple_start + diff --git a/kexec/arch/ppc/ppc_asm.h b/kexec/arch/ppc/ppc_asm.h new file mode 100644 index 0000000..36503a9 --- /dev/null +++ b/kexec/arch/ppc/ppc_asm.h @@ -0,0 +1,506 @@ +/* + * ppc_asm.h - mainly bits stolen from Linux kernel asm/reg.h and asm/ppc_asm.h + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +/* Machine State Register (MSR) Fields */ +#define MSR_SF (1<<63) +#define MSR_ISF (1<<61) +#define MSR_VEC (1<<25) /* Enable AltiVec */ +#define MSR_POW (1<<18) /* Enable Power Management */ +#define MSR_WE (1<<18) /* Wait State Enable */ +#define MSR_TGPR (1<<17) /* TLB Update registers in use */ +#define MSR_CE (1<<17) /* Critical Interrupt Enable */ +#define MSR_ILE (1<<16) /* Interrupt Little Endian */ +#define MSR_EE (1<<15) /* External Interrupt Enable */ +#define MSR_PR (1<<14) /* Problem State / Privilege Level */ +#define MSR_FP (1<<13) /* Floating Point enable */ +#define MSR_ME (1<<12) /* Machine Check Enable */ +#define MSR_FE0 (1<<11) /* Floating Exception mode 0 */ +#define MSR_SE (1<<10) /* Single Step */ +#define MSR_BE (1<<9) /* Branch Trace */ +#define MSR_DE (1<<9) /* Debug Exception Enable */ +#define MSR_FE1 (1<<8) /* Floating Exception mode 1 */ +#define MSR_IP (1<<6) /* Exception prefix 0x000/0xFFF */ +#define MSR_IR (1<<5) /* Instruction Relocate */ +#define MSR_DR (1<<4) /* Data Relocate */ +#define MSR_PE (1<<3) /* Protection Enable */ +#define MSR_PX (1<<2) /* Protection Exclusive Mode */ +#define MSR_RI (1<<1) /* Recoverable Exception */ +#define MSR_LE (1<<0) /* Little Endian */ + +/* Special Purpose Registers (SPRNs)*/ +#define SPRN_CTR 0x009 /* Count Register */ +#define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ +#define SPRN_DAR 0x013 /* Data Address Register */ +#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ +#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ +#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ +#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ +#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ +#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ +#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ +#define SPRN_DBAT1U 0x21A /* Data BAT 1 Upper Register */ +#define SPRN_DBAT2L 0x21D /* Data BAT 2 Lower Register */ +#define SPRN_DBAT2U 0x21C /* Data BAT 2 Upper Register */ +#define SPRN_DBAT3L 0x21F /* Data BAT 3 Lower Register */ +#define SPRN_DBAT3U 0x21E /* Data BAT 3 Upper Register */ +#define SPRN_DBAT4L 0x239 /* Data BAT 4 Lower Register */ +#define SPRN_DBAT4U 0x238 /* Data BAT 4 Upper Register */ +#define SPRN_DBAT5L 0x23B /* Data BAT 5 Lower Register */ +#define SPRN_DBAT5U 0x23A /* Data BAT 5 Upper Register */ +#define SPRN_DBAT6L 0x23D /* Data BAT 6 Lower Register */ +#define SPRN_DBAT6U 0x23C /* Data BAT 6 Upper Register */ +#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ +#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ + +#define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define DER_RSTE 0x40000000 /* Reset Interrupt */ +#define DER_CHSTPE 0x20000000 /* Check Stop */ +#define DER_MCIE 0x10000000 /* Machine Check Interrupt */ +#define DER_EXTIE 0x02000000 /* External Interrupt */ +#define DER_ALIE 0x01000000 /* Alignment Interrupt */ +#define DER_PRIE 0x00800000 /* Program Interrupt */ +#define DER_FPUVIE 0x00400000 /* FP Unavailable Interrupt */ +#define DER_DECIE 0x00200000 /* Decrementer Interrupt */ +#define DER_SYSIE 0x00040000 /* System Call Interrupt */ +#define DER_TRE 0x00020000 /* Trace Interrupt */ +#define DER_SEIE 0x00004000 /* FP SW Emulation Interrupt */ +#define DER_ITLBMSE 0x00002000 /* Imp. Spec. Instruction TLB Miss */ +#define DER_ITLBERE 0x00001000 /* Imp. Spec. Instruction TLB Error */ +#define DER_DTLBMSE 0x00000800 /* Imp. Spec. Data TLB Miss */ +#define DER_DTLBERE 0x00000400 /* Imp. Spec. Data TLB Error */ +#define DER_LBRKE 0x00000008 /* Load/Store Breakpoint Interrupt */ +#define DER_IBRKE 0x00000004 /* Instruction Breakpoint Interrupt */ +#define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ +#define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ +#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ +#define SPRN_EAR 0x11A /* External Address Register */ +#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_EMCP (1<<31) /* Enable Machine Check pin */ +#define HID0_EBA (1<<29) /* Enable Bus Address Parity */ +#define HID0_EBD (1<<28) /* Enable Bus Data Parity */ +#define HID0_SBCLK (1<<27) +#define HID0_EICE (1<<26) +#define HID0_TBEN (1<<26) /* Timebase enable - 745x */ +#define HID0_ECLK (1<<25) +#define HID0_PAR (1<<24) +#define HID0_STEN (1<<24) /* Software table search enable - 745x */ +#define HID0_HIGH_BAT (1<<23) /* Enable high BATs - 7455 */ +#define HID0_DOZE (1<<23) +#define HID0_NAP (1<<22) +#define HID0_SLEEP (1<<21) +#define HID0_DPM (1<<20) +#define HID0_BHTCLR (1<<18) /* Clear branch history table - 7450 */ +#define HID0_XAEN (1<<17) /* Extended addressing enable - 7450 */ +#define HID0_NHR (1<<16) /* Not hard reset (software bit-7450)*/ +#define HID0_ICE (1<<15) /* Instruction Cache Enable */ +#define HID0_DCE (1<<14) /* Data Cache Enable */ +#define HID0_ILOCK (1<<13) /* Instruction Cache Lock */ +#define HID0_DLOCK (1<<12) /* Data Cache Lock */ +#define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ +#define HID0_DCI (1<<10) /* Data Cache Invalidate */ +#define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_SGE (1<<7) /* Store Gathering Enable */ +#define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ +#define HID0_DFCA (1<<6) /* Data Cache Flush Assist */ +#define HID0_LRSTK (1<<4) /* Link register stack - 745x */ +#define HID0_BTIC (1<<5) /* Branch Target Instr Cache Enable */ +#define HID0_ABE (1<<3) /* Address Broadcast Enable */ +#define HID0_FOLD (1<<3) /* Branch Folding enable - 745x */ +#define HID0_BHTE (1<<2) /* Branch History Table Enable */ +#define HID0_BTCD (1<<1) /* Branch target cache disable */ +#define HID0_NOPDST (1<<1) /* No-op dst, dstt, etc. instr. */ +#define HID0_NOPTI (1<<0) /* No-op dcbt and dcbst instr. */ + +#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ +#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */ +#define HID1_PC0 (1<<16) /* 7450 PLL_CFG[0] */ +#define HID1_PC1 (1<<15) /* 7450 PLL_CFG[1] */ +#define HID1_PC2 (1<<14) /* 7450 PLL_CFG[2] */ +#define HID1_PC3 (1<<13) /* 7450 PLL_CFG[3] */ +#define HID1_SYNCBE (1<<11) /* 7450 ABE for sync, eieio */ +#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ +#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ +#define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define SPRN_HID5 0x3F6 /* 970 HID5 */ +#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2) +#define SPRN_IAC1 0x3F4 /* Instruction Address Compare 1 */ +#define SPRN_IAC2 0x3F5 /* Instruction Address Compare 2 */ +#endif +#define SPRN_IBAT0L 0x211 /* Instruction BAT 0 Lower Register */ +#define SPRN_IBAT0U 0x210 /* Instruction BAT 0 Upper Register */ +#define SPRN_IBAT1L 0x213 /* Instruction BAT 1 Lower Register */ +#define SPRN_IBAT1U 0x212 /* Instruction BAT 1 Upper Register */ +#define SPRN_IBAT2L 0x215 /* Instruction BAT 2 Lower Register */ +#define SPRN_IBAT2U 0x214 /* Instruction BAT 2 Upper Register */ +#define SPRN_IBAT3L 0x217 /* Instruction BAT 3 Lower Register */ +#define SPRN_IBAT3U 0x216 /* Instruction BAT 3 Upper Register */ +#define SPRN_IBAT4L 0x231 /* Instruction BAT 4 Lower Register */ +#define SPRN_IBAT4U 0x230 /* Instruction BAT 4 Upper Register */ +#define SPRN_IBAT5L 0x233 /* Instruction BAT 5 Lower Register */ +#define SPRN_IBAT5U 0x232 /* Instruction BAT 5 Upper Register */ +#define SPRN_IBAT6L 0x235 /* Instruction BAT 6 Lower Register */ +#define SPRN_IBAT6U 0x234 /* Instruction BAT 6 Upper Register */ +#define SPRN_IBAT7L 0x237 /* Instruction BAT 7 Lower Register */ +#define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */ +#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ +#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */ +#define ICTRL_EICE 0x08000000 /* enable icache parity errs */ +#define ICTRL_EDC 0x04000000 /* enable dcache parity errs */ +#define ICTRL_EICP 0x00000100 /* enable icache par. check */ +#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ +#define SPRN_IMMR 0x27E /* Internal Memory Map Register */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR2 0x3f8 +#define L2CR_L2E 0x80000000 /* L2 enable */ +#define L2CR_L2PE 0x40000000 /* L2 parity enable */ +#define L2CR_L2SIZ_MASK 0x30000000 /* L2 size mask */ +#define L2CR_L2SIZ_256KB 0x10000000 /* L2 size 256KB */ +#define L2CR_L2SIZ_512KB 0x20000000 /* L2 size 512KB */ +#define L2CR_L2SIZ_1MB 0x30000000 /* L2 size 1MB */ +#define L2CR_L2CLK_MASK 0x0e000000 /* L2 clock mask */ +#define L2CR_L2CLK_DISABLED 0x00000000 /* L2 clock disabled */ +#define L2CR_L2CLK_DIV1 0x02000000 /* L2 clock / 1 */ +#define L2CR_L2CLK_DIV1_5 0x04000000 /* L2 clock / 1.5 */ +#define L2CR_L2CLK_DIV2 0x08000000 /* L2 clock / 2 */ +#define L2CR_L2CLK_DIV2_5 0x0a000000 /* L2 clock / 2.5 */ +#define L2CR_L2CLK_DIV3 0x0c000000 /* L2 clock / 3 */ +#define L2CR_L2RAM_MASK 0x01800000 /* L2 RAM type mask */ +#define L2CR_L2RAM_FLOW 0x00000000 /* L2 RAM flow through */ +#define L2CR_L2RAM_PIPE 0x01000000 /* L2 RAM pipelined */ +#define L2CR_L2RAM_PIPE_LW 0x01800000 /* L2 RAM pipelined latewr */ +#define L2CR_L2DO 0x00400000 /* L2 data only */ +#define L2CR_L2I 0x00200000 /* L2 global invalidate */ +#define L2CR_L2CTL 0x00100000 /* L2 RAM control */ +#define L2CR_L2WT 0x00080000 /* L2 write-through */ +#define L2CR_L2TS 0x00040000 /* L2 test support */ +#define L2CR_L2OH_MASK 0x00030000 /* L2 output hold mask */ +#define L2CR_L2OH_0_5 0x00000000 /* L2 output hold 0.5 ns */ +#define L2CR_L2OH_1_0 0x00010000 /* L2 output hold 1.0 ns */ +#define L2CR_L2SL 0x00008000 /* L2 DLL slow */ +#define L2CR_L2DF 0x00004000 /* L2 differential clock */ +#define L2CR_L2BYP 0x00002000 /* L2 DLL bypass */ +#define L2CR_L2IP 0x00000001 /* L2 GI in progress */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define L3CR_L3E 0x80000000 /* L3 enable */ +#define L3CR_L3PE 0x40000000 /* L3 data parity enable */ +#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ +#define L3CR_L3SIZ 0x10000000 /* L3 size */ +#define L3CR_L3CLKEN 0x08000000 /* L3 clock enable */ +#define L3CR_L3RES 0x04000000 /* L3 special reserved bit */ +#define L3CR_L3CLKDIV 0x03800000 /* L3 clock divisor */ +#define L3CR_L3IO 0x00400000 /* L3 instruction only */ +#define L3CR_L3SPO 0x00040000 /* L3 sample point override */ +#define L3CR_L3CKSP 0x00030000 /* L3 clock sample point */ +#define L3CR_L3PSP 0x0000e000 /* L3 P-clock sample point */ +#define L3CR_L3REP 0x00001000 /* L3 replacement algorithm */ +#define L3CR_L3HWF 0x00000800 /* L3 hardware flush */ +#define L3CR_L3I 0x00000400 /* L3 global invalidate */ +#define L3CR_L3RT 0x00000300 /* L3 SRAM type */ +#define L3CR_L3NIRCA 0x00000080 /* L3 non-integer ratio clock adj. */ +#define L3CR_L3DO 0x00000040 /* L3 data only mode */ +#define L3CR_PMEN 0x00000004 /* L3 private memory enable */ +#define L3CR_PMSIZ 0x00000001 /* L3 private memory size */ +#define SPRN_MSSCR0 0x3f6 /* Memory Subsystem Control Register 0 */ +#define SPRN_MSSSR0 0x3f7 /* Memory Subsystem Status Register 1 */ +#define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ +#define SPRN_LDSTDB 0x3f4 /* */ +#define SPRN_LR 0x008 /* Link Register */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ +#ifndef SPRN_PIR +#define SPRN_PIR 0x3FF /* Processor Identification Register */ +#endif +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ +#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ +#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define SPRN_RPA 0x3D6 /* Required Physical Address Register */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ +#define SPRN_SDR1 0x019 /* MMU Hash Base Register */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ +#define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ +#define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ +#define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ +#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ +#define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */ +#define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */ +#define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */ +#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ +#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ +#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_THRM1 0x3FC /* Thermal Management Register 1 */ +/* these bits were defined in inverted endian sense originally, ugh, confusing */ +#define THRM1_TIN (1 << 31) +#define THRM1_TIV (1 << 30) +#define THRM1_THRES(x) ((x&0x7f)<<23) +#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM1_TID (1<<2) +#define THRM1_TIE (1<<1) +#define THRM1_V (1<<0) +#define SPRN_THRM2 0x3FD /* Thermal Management Register 2 */ +#define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ +#define THRM3_E (1<<0) +#define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ +#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ +#define SPRN_XER 0x001 /* Fixed Point Exception Register */ + +/* Bit definitions for MMCR0 and PMC1 / PMC2. */ +#define MMCR0_PMC1_CYCLES (1 << 7) +#define MMCR0_PMC1_ICACHEMISS (5 << 7) +#define MMCR0_PMC1_DTLB (6 << 7) +#define MMCR0_PMC2_DCACHEMISS 0x6 +#define MMCR0_PMC2_CYCLES 0x1 +#define MMCR0_PMC2_ITLB 0x7 +#define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Short-hand versions for a number of the above SPRNs */ +#define CTR SPRN_CTR /* Counter Register */ +#define DAR SPRN_DAR /* Data Address Register */ +#define DABR SPRN_DABR /* Data Address Breakpoint Register */ +#define DBAT0L SPRN_DBAT0L /* Data BAT 0 Lower Register */ +#define DBAT0U SPRN_DBAT0U /* Data BAT 0 Upper Register */ +#define DBAT1L SPRN_DBAT1L /* Data BAT 1 Lower Register */ +#define DBAT1U SPRN_DBAT1U /* Data BAT 1 Upper Register */ +#define DBAT2L SPRN_DBAT2L /* Data BAT 2 Lower Register */ +#define DBAT2U SPRN_DBAT2U /* Data BAT 2 Upper Register */ +#define DBAT3L SPRN_DBAT3L /* Data BAT 3 Lower Register */ +#define DBAT3U SPRN_DBAT3U /* Data BAT 3 Upper Register */ +#define DBAT4L SPRN_DBAT4L /* Data BAT 4 Lower Register */ +#define DBAT4U SPRN_DBAT4U /* Data BAT 4 Upper Register */ +#define DBAT5L SPRN_DBAT5L /* Data BAT 5 Lower Register */ +#define DBAT5U SPRN_DBAT5U /* Data BAT 5 Upper Register */ +#define DBAT6L SPRN_DBAT6L /* Data BAT 6 Lower Register */ +#define DBAT6U SPRN_DBAT6U /* Data BAT 6 Upper Register */ +#define DBAT7L SPRN_DBAT7L /* Data BAT 7 Lower Register */ +#define DBAT7U SPRN_DBAT7U /* Data BAT 7 Upper Register */ +#define DEC SPRN_DEC /* Decrement Register */ +#define DMISS SPRN_DMISS /* Data TLB Miss Register */ +#define DSISR SPRN_DSISR /* Data Storage Interrupt Status Register */ +#define EAR SPRN_EAR /* External Address Register */ +#define HASH1 SPRN_HASH1 /* Primary Hash Address Register */ +#define HASH2 SPRN_HASH2 /* Secondary Hash Address Register */ +#define HID0 SPRN_HID0 /* Hardware Implementation Register 0 */ +#define HID1 SPRN_HID1 /* Hardware Implementation Register 1 */ +#define IABR SPRN_IABR /* Instruction Address Breakpoint Register */ +#define IBAT0L SPRN_IBAT0L /* Instruction BAT 0 Lower Register */ +#define IBAT0U SPRN_IBAT0U /* Instruction BAT 0 Upper Register */ +#define IBAT1L SPRN_IBAT1L /* Instruction BAT 1 Lower Register */ +#define IBAT1U SPRN_IBAT1U /* Instruction BAT 1 Upper Register */ +#define IBAT2L SPRN_IBAT2L /* Instruction BAT 2 Lower Register */ +#define IBAT2U SPRN_IBAT2U /* Instruction BAT 2 Upper Register */ +#define IBAT3L SPRN_IBAT3L /* Instruction BAT 3 Lower Register */ +#define IBAT3U SPRN_IBAT3U /* Instruction BAT 3 Upper Register */ +#define IBAT4L SPRN_IBAT4L /* Instruction BAT 4 Lower Register */ +#define IBAT4U SPRN_IBAT4U /* Instruction BAT 4 Upper Register */ +#define IBAT5L SPRN_IBAT5L /* Instruction BAT 5 Lower Register */ +#define IBAT5U SPRN_IBAT5U /* Instruction BAT 5 Upper Register */ +#define IBAT6L SPRN_IBAT6L /* Instruction BAT 6 Lower Register */ +#define IBAT6U SPRN_IBAT6U /* Instruction BAT 6 Upper Register */ +#define IBAT7L SPRN_IBAT7L /* Instruction BAT 7 Lower Register */ +#define IBAT7U SPRN_IBAT7U /* Instruction BAT 7 Upper Register */ +#define ICMP SPRN_ICMP /* Instruction TLB Compare Register */ +#define IMISS SPRN_IMISS /* Instruction TLB Miss Register */ +#define IMMR SPRN_IMMR /* PPC 860/821 Internal Memory Map Register */ +#define L2CR SPRN_L2CR /* Classic PPC L2 cache control register */ +#define L3CR SPRN_L3CR /* PPC 745x L3 cache control register */ +#define LR SPRN_LR +#define PVR SPRN_PVR /* Processor Version */ +#define RPA SPRN_RPA /* Required Physical Address Register */ +#define SDR1 SPRN_SDR1 /* MMU hash base register */ +#define SPR0 SPRN_SPRG0 /* Supervisor Private Registers */ +#define SPR1 SPRN_SPRG1 +#define SPR2 SPRN_SPRG2 +#define SPR3 SPRN_SPRG3 +#define SPR4 SPRN_SPRG4 +#define SPR5 SPRN_SPRG5 +#define SPR6 SPRN_SPRG6 +#define SPR7 SPRN_SPRG7 +#define SPRG0 SPRN_SPRG0 +#define SPRG1 SPRN_SPRG1 +#define SPRG2 SPRN_SPRG2 +#define SPRG3 SPRN_SPRG3 +#define SPRG4 SPRN_SPRG4 +#define SPRG5 SPRN_SPRG5 +#define SPRG6 SPRN_SPRG6 +#define SPRG7 SPRN_SPRG7 +#define SRR0 SPRN_SRR0 /* Save and Restore Register 0 */ +#define SRR1 SPRN_SRR1 /* Save and Restore Register 1 */ +#define SRR2 SPRN_SRR2 /* Save and Restore Register 2 */ +#define SRR3 SPRN_SRR3 /* Save and Restore Register 3 */ +#define ICTC SPRN_ICTC /* Instruction Cache Throttling Control Reg */ +#define THRM1 SPRN_THRM1 /* Thermal Management Register 1 */ +#define THRM2 SPRN_THRM2 /* Thermal Management Register 2 */ +#define THRM3 SPRN_THRM3 /* Thermal Management Register 3 */ +#define XER SPRN_XER +#define TBRL SPRN_TBRL /* Time Base Read Lower Register */ +#define TBRU SPRN_TBRU /* Time Base Read Upper Register */ +#define TBWL SPRN_TBWL /* Time Base Write Lower Register */ +#define TBWU SPRN_TBWU /* Time Base Write Upper Register */ + +/* Processor Version Register */ + +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +/* + * IBM has further subdivided the standard PowerPC 16-bit version and + * revision subfields of the PVR for the PowerPC 403s into the following: + */ + +#define PVR_FAM(pvr) (((pvr) >> 20) & 0xFFF) /* Family field */ +#define PVR_MEM(pvr) (((pvr) >> 16) & 0xF) /* Member field */ +#define PVR_CORE(pvr) (((pvr) >> 12) & 0xF) /* Core field */ +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + +/* Processor Version Numbers */ + +#define PVR_403GA 0x00200000 +#define PVR_403GB 0x00200100 +#define PVR_403GC 0x00200200 +#define PVR_403GCX 0x00201400 +#define PVR_405GP 0x40110000 +#define PVR_STB03XXX 0x40310000 +#define PVR_NP405H 0x41410000 +#define PVR_NP405L 0x41610000 +#define PVR_440GP_RB 0x40120440 +#define PVR_440GP_RC1 0x40120481 +#define PVR_440GP_RC2 0x40200481 +#define PVR_440GX_RA 0x51b21850 +#define PVR_440GX_RB 0x51b21851 +#define PVR_440GX_RB1 0x51b21852 +#define PVR_601 0x00010000 +#define PVR_602 0x00050000 +#define PVR_603 0x00030000 +#define PVR_603e 0x00060000 +#define PVR_603ev 0x00070000 +#define PVR_603r 0x00071000 +#define PVR_604 0x00040000 +#define PVR_604e 0x00090000 +#define PVR_604r 0x000A0000 +#define PVR_620 0x00140000 +#define PVR_740 0x00080000 +#define PVR_750 PVR_740 +#define PVR_740P 0x10080000 +#define PVR_750P PVR_740P +#define PVR_7400 0x000C0000 +#define PVR_7410 0x800C0000 +#define PVR_7450 0x80000000 +/* + * For the 8xx processors, all of them report the same PVR family for + * the PowerPC core. The various versions of these processors must be + * differentiated by the version number in the Communication Processor + * Module (CPM). + */ +#define PVR_821 0x00500000 +#define PVR_823 PVR_821 +#define PVR_850 PVR_821 +#define PVR_860 PVR_821 +#define PVR_8240 0x00810100 +#define PVR_8245 0x80811014 +#define PVR_8260 PVR_8240 + +/* Segment Registers */ +#define SR0 0 +#define SR1 1 +#define SR2 2 +#define SR3 3 +#define SR4 4 +#define SR5 5 +#define SR6 6 +#define SR7 7 +#define SR8 8 +#define SR9 9 +#define SR10 10 +#define SR11 11 +#define SR12 12 +#define SR13 13 +#define SR14 14 +#define SR15 15 + + +/* returns r3 = relocated address of sym */ +/* modifies r0 */ +#define RELOC_SYM(sym) \ + mflr r3; \ + bl 1f; \ +1: mflr r0; \ + mtlr r3; \ + lis r3, 1b@ha; \ + ori r3, r3, 1b@l; \ + subf r0, r3, r0; \ + lis r3, sym@ha; \ + ori r3, r3, sym@l; \ + add r3, r3, r0 + diff --git a/kexec/arch/ppc64/Makefile b/kexec/arch/ppc64/Makefile new file mode 100644 index 0000000..9caf501 --- /dev/null +++ b/kexec/arch/ppc64/Makefile @@ -0,0 +1,26 @@ +# +# kexec ppc64 (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc64_KEXEC_SRCS = kexec/arch/ppc64/kexec-elf-rel-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-zImage-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-elf-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/crashdump-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/fdt.c +ppc64_KEXEC_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +ppc64_ARCH_REUSE_INITRD = + +ppc64_FS2DT = kexec/fs2dt.c +ppc64_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/ppc64/crashdump-ppc64.h \ + -include $(srcdir)/kexec/arch/ppc64/kexec-ppc64.h + +ppc64_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +dist += kexec/arch/ppc64/Makefile $(ppc64_KEXEC_SRCS) \ + kexec/arch/ppc64/kexec-ppc64.h kexec/arch/ppc64/crashdump-ppc64.h \ + kexec/arch/ppc64/include/arch/fdt.h \ + kexec/arch/ppc64/include/arch/options.h + diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c new file mode 100644 index 0000000..6d47898 --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -0,0 +1,644 @@ +/* + * kexec: Linux boots Linux + * + * Created by: R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +unsigned int num_of_lmb_sets; +unsigned int is_dyn_mem_v2; +uint64_t lmb_size; + +static struct crash_elf_info elf_info64 = +{ + class: ELFCLASS64, +#if BYTE_ORDER == LITTLE_ENDIAN + data: ELFDATA2LSB, +#else + data: ELFDATA2MSB, +#endif + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2MSB, + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +extern struct arch_options_t arch_options; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range = NULL; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +static unsigned long long cstart, cend; +static int memory_ranges; + +/* + * Exclude the region that lies within crashkernel and above the memory + * limit which is reflected by mem= kernel option. + */ +static void exclude_crash_region(uint64_t start, uint64_t end) +{ + /* If memory_limit is set then exclude the memory region above it. */ + if (memory_limit) { + if (start >= memory_limit) + return; + if (end > memory_limit) + end = memory_limit; + } + + if (cstart < end && cend > start) { + if (start < cstart && end > cend) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (start < cstart) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (end > cend) { + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } + } else { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } +} + +static int get_dyn_reconf_crash_memory_ranges(void) +{ + uint64_t start, end; + uint64_t startrange, endrange; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + uint32_t flags; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if (is_dyn_mem_v2) + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + fseek(file, 4, SEEK_SET); + startrange = endrange = 0; + size = lmb_size; + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS]))); + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((flags & 0x80) || !(flags & 0x8)) + continue; + + if (start != endrange) { + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + startrange = start; + } + endrange = end; + } + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + + fclose(file); + return 0; +} + +/* + * For a given memory node, check if it is mapped to system RAM or + * to onboard memory on accelerator device like GPU card or such. + */ +static int is_coherent_device_mem(const char *fname) +{ + char fpath[PATH_LEN]; + char buf[32]; + DIR *dmem; + FILE *file; + struct dirent *mentry; + int cnt, ret = 0; + + strcpy(fpath, fname); + if ((dmem = opendir(fpath)) == NULL) { + perror(fpath); + return -1; + } + + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "compatible")) + continue; + + strcat(fpath, "/compatible"); + if ((file = fopen(fpath, "r")) == NULL) { + perror(fpath); + ret = -1; + break; + } + if ((cnt = fread(buf, 1, 32, file)) < 0) { + perror(fpath); + fclose(file); + ret = -1; + break; + } + if (!strncmp(buf, "ibm,coherent-device-memory", 26)) { + fclose(file); + ret = 1; + break; + } + fclose(file); + } + + closedir(dmem); + return ret; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[PATH_LEN]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n, ret, crash_rng_len = 0; + unsigned long long start, end; + int page_size; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + + /* create a separate program header for the backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END + 1; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + goto err; + } + + cstart = crash_base; + cend = crash_base + crash_size; + + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)){ + get_dyn_reconf_crash_memory_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + + ret = is_coherent_device_mem(fname); + if (ret == -1) { + closedir(dir); + goto err; + } else if (ret == 1) { + /* + * Avoid adding this memory region as it is not + * mapped to system RAM. + */ + continue; + } + + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + goto err; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + start = be64_to_cpu(((unsigned long long *)buf)[0]); + end = start + + be64_to_cpu(((unsigned long long *)buf)[1]); + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + exclude_crash_region(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The rtas section created here is formed by reading rtas-base + * and rtas-size from /proc/device-tree/rtas. Unfortunately + * rtas-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in rtas-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + + /* + * If OPAL region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < opal_base + opal_size && + opal_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = opal_base; + cend = opal_base + opal_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The opal section created here is formed by reading opal-base + * and opal-size from /proc/device-tree/ibm,opal. Unfortunately + * opal-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in opal-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for(j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdline_size, cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + uint64_t max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz; + uint64_t elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); + + /* On ppc64 memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free (tmp); + return -1; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base, + max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + unsigned int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i=0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx\n", + i, base, size); +#endif + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx", + i, base, usablemem_rgns.ranges[i].end - base); +#endif + return; + } else if (end > uend){ + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new end:%llx, new size:%llx", + i, end, end - usablemem_rgns.ranges[i].start); +#endif + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = be64_to_cpu(value); + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + be64_to_cpu(value) - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + +#if 0 +static int sort_regions(mem_rgns_t *rgn) +{ + int i, j; + unsigned long long tstart, tend; + for (i = 0; i < rgn->size; i++) { + for (j = 0; j < rgn->size - i - 1; j++) { + if (rgn->ranges[j].start > rgn->ranges[j+1].start) { + tstart = rgn->ranges[j].start; + tend = rgn->ranges[j].end; + rgn->ranges[j].start = rgn->ranges[j+1].start; + rgn->ranges[j].end = rgn->ranges[j+1].end; + rgn->ranges[j+1].start = tstart; + rgn->ranges[j+1].end = tend; + } + } + } + return 0; + +} +#endif + diff --git a/kexec/arch/ppc64/crashdump-ppc64.h b/kexec/arch/ppc64/crashdump-ppc64.h new file mode 100644 index 0000000..b0cba8a --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.h @@ -0,0 +1,51 @@ +#ifndef CRASHDUMP_PPC64_H +#define CRASHDUMP_PPC64_H + +#include <stdint.h> +#include <sys/types.h> + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + uint64_t max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +#define PAGE_OFFSET 0xC000000000000000ULL +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE 0xD000000000000000ULL + +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define MAXMEM (-(unsigned long)(KERNELBASE-VMALLOCBASE)) + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +#define KERNEL_RUN_AT_ZERO_MAGIC 0x72756e30 /* "run0" */ + +extern uint64_t crash_base; +extern uint64_t crash_size; +extern uint64_t memory_limit; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; + +/* + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB + * sets where each set represents a group of sequential LMB entries. In + * case of ibm,dynamic-memory property, the number of LMB sets is nothing + * but the total number of LMB entries. + */ +extern unsigned int num_of_lmb_sets; +extern unsigned int is_dyn_mem_v2; +extern uint64_t lmb_size; + +#define LMB_ENTRY_SIZE 24 +#define DRCONF_ADDR (is_dyn_mem_v2 ? 4 : 0) +#define DRCONF_FLAGS 20 + +#endif /* CRASHDUMP_PPC64_H */ diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c new file mode 100644 index 0000000..8bc6d2d --- /dev/null +++ b/kexec/arch/ppc64/fdt.c @@ -0,0 +1,78 @@ +/* + * ppc64 fdt fixups + * + * Copyright 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <arch/fdt.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +/* + * Let the kernel know it booted from kexec, as some things (e.g. + * secondary CPU release) may work differently. + */ +static int fixup_kexec_prop(void *fdt) +{ + int err, nodeoffset; + + nodeoffset = fdt_subnode_offset(fdt, 0, "chosen"); + if (nodeoffset < 0) + nodeoffset = fdt_add_subnode(fdt, 0, "chosen"); + if (nodeoffset < 0) { + printf("%s: add /chosen %s\n", __func__, + fdt_strerror(nodeoffset)); + return -1; + } + + err = fdt_setprop(fdt, nodeoffset, "linux,booted-from-kexec", + NULL, 0); + if (err < 0) { + printf("%s: couldn't write linux,booted-from-kexec: %s\n", + __func__, fdt_strerror(err)); + return -1; + } + + return 0; +} + + +/* + * For now, assume that the added content fits in the file. + * This should be the case when flattening from /proc/device-tree, + * and when passing in a dtb, dtc can be told to add padding. + */ +int fixup_dt(char **fdt, off_t *size) +{ + int ret; + + *size += 4096; + *fdt = realloc(*fdt, *size); + if (!*fdt) { + fprintf(stderr, "%s: out of memory\n", __func__); + return -1; + } + + ret = fdt_open_into(*fdt, *fdt, *size); + if (ret < 0) { + fprintf(stderr, "%s: fdt_open_into: %s\n", __func__, + fdt_strerror(ret)); + return -1; + } + + ret = fixup_kexec_prop(*fdt); + if (ret < 0) + return ret; + + return 0; +} diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h new file mode 100644 index 0000000..b19f185 --- /dev/null +++ b/kexec/arch/ppc64/include/arch/fdt.h @@ -0,0 +1,8 @@ +#ifndef KEXEC_ARCH_PPC64_FDT +#define KEXEC_ARCH_PPC64_FDT + +#include <sys/types.h> + +int fixup_dt(char **fdt, off_t *size); + +#endif diff --git a/kexec/arch/ppc64/include/arch/options.h b/kexec/arch/ppc64/include/arch/options.h new file mode 100644 index 0000000..2bca96a --- /dev/null +++ b/kexec/arch/ppc64/include/arch/options.h @@ -0,0 +1,51 @@ +#ifndef KEXEC_ARCH_PPC64_OPTIONS_H +#define KEXEC_ARCH_PPC64_OPTIONS_H + +#define OPT_ELF64_CORE (OPT_MAX+0) +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+1) +#define OPT_ARCH_MAX (OPT_MAX+2) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_DEVICETREEBLOB (OPT_ARCH_MAX+2) +#define OPT_ARGS_IGNORE (OPT_ARCH_MAX+3) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + + +#endif /* KEXEC_ARCH_PPC64_OPTIONS_H */ diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c new file mode 100644 index 0000000..01d045f --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -0,0 +1,496 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <libfdt.h> +#include <arch/fdt.h> +#include <arch/options.h> + +uint64_t initrd_base, initrd_size; +unsigned char reuse_initrd = 0; +const char *ramdisk; + +int elf_ppc64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if ((ehdr.e_machine != EM_PPC64) && (ehdr.e_machine != EM_PPC)) { + /* for a different architecture */ + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +static int read_prop(char *name, void *value, size_t len) +{ + int fd; + size_t rlen; + + fd = open(name, O_RDONLY); + if (fd == -1) + return -1; + + rlen = read(fd, value, len); + if (rlen < 0) + fprintf(stderr, "Warning : Can't read %s : %s", + name, strerror(errno)); + else if (rlen != len) + fprintf(stderr, "Warning : short read from %s", name); + + close(fd); + return 0; +} + +static int elf_ppc64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *cmdline, *dtb; + char *append_cmdline = NULL; + char *reuse_cmdline = NULL; + int opt, cmdline_len = 0; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* Parse command line arguments */ + cmdline = 0; + dtb = 0; + ramdisk = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + dtb = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + if (dtb) + die("--dtb not supported while using --kexec-file-syscall.\n"); + + if (reuse_initrd) + die("--reuseinitrd not supported with --kexec-file-syscall.\n"); + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (cmdline) { + cmdline_len = strlen(cmdline) + 1; + } else { + cmdline = strdup("\0"); + cmdline_len = 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = cmdline; + info->command_line_len = cmdline_len; + return ret; +out: + if (cmdline_len == 1) + free(cmdline); + return ret; +} + +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline, *modified_cmdline = NULL; + char *reuse_cmdline = NULL; + char *append_cmdline = NULL; + const char *devicetreeblob; + uint64_t max_addr, hole_addr; + char *seg_buf = NULL; + off_t seg_size = 0; + struct mem_phdr *phdr; + size_t size; +#ifdef NEED_RESERVE_DTB + uint64_t *rsvmap_ptr; + struct bootblock *bb_ptr; +#endif + int result, opt; + uint64_t my_kernel, my_dt_offset; + uint64_t my_opal_base = 0, my_opal_entry = 0; + unsigned int my_panic_kernel; + uint64_t my_stack, my_backup_start; + uint64_t toc_addr; + uint32_t my_run_at_load; + unsigned int slave_code[256/sizeof (unsigned int)], master_entry; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + if (info->file_mode) + return elf_ppc64_load_file(argc, argv, info); + + /* Parse command line arguments */ + initrd_base = 0; + initrd_size = 0; + cmdline = 0; + ramdisk = 0; + devicetreeblob = 0; + max_addr = 0xFFFFFFFFFFFFFFFFULL; + hole_addr = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + devicetreeblob = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (!cmdline) + fprintf(stdout, "Warning: append= option is not passed. Using the first kernel root partition\n"); + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (cmdline) { + strncpy(modified_cmdline, cmdline, COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + my_kernel = hole_addr = locate_hole(info, size, 0, 0, max_addr, 1); + ehdr.e_phdr[0].p_paddr = hole_addr; + result = elf_exec_load(&ehdr, info); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (result < 0) + return -1; + /* Use new command line. */ + cmdline = modified_cmdline; + } + + /* Add v2wrap to the current image */ + elf_rel_build_load(info, &info->rhdr, purgatory, + purgatory_size, 0, max_addr, 1, 0); + + /* Add a ram-disk to the current image + * Note: Add the ramdisk after elf_rel_build_load + */ + if (ramdisk) { + if (devicetreeblob) { + fprintf(stderr, + "Can't use ramdisk with device tree blob input\n"); + return -1; + } + seg_buf = slurp_file(ramdisk, &seg_size); + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, 1); + initrd_base = hole_addr; + initrd_size = seg_size; + } /* ramdisk */ + + if (devicetreeblob) { + /* Grab device tree from buffer */ + seg_buf = slurp_file(devicetreeblob, &seg_size); + } else { + /* create from fs2dt */ + create_flatten_tree(&seg_buf, &seg_size, cmdline); + } + + result = fixup_dt(&seg_buf, &seg_size); + if (result < 0) + return result; + + my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, -1); + +#ifdef NEED_RESERVE_DTB + /* patch reserve map address for flattened device-tree + * find last entry (both 0) in the reserve mem list. Assume DT + * entry is before this one + */ + bb_ptr = (struct bootblock *)(seg_buf); + rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap)); + while (*rsvmap_ptr || *(rsvmap_ptr+1)) + rsvmap_ptr += 2; + rsvmap_ptr -= 2; + *rsvmap_ptr = cpu_to_be64(my_dt_offset); + rsvmap_ptr++; + *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize)); +#endif + + if (read_prop("/proc/device-tree/ibm,opal/opal-base-address", + &my_opal_base, sizeof(my_opal_base)) == 0) { + my_opal_base = be64_to_cpu(my_opal_base); + elf_rel_set_symbol(&info->rhdr, "opal_base", + &my_opal_base, sizeof(my_opal_base)); + } + + if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address", + &my_opal_entry, sizeof(my_opal_entry)) == 0) { + my_opal_entry = be64_to_cpu(my_opal_entry); + elf_rel_set_symbol(&info->rhdr, "opal_entry", + &my_opal_entry, sizeof(my_opal_entry)); + } + + /* Set kernel */ + elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + + /* Set dt_offset */ + elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + + /* get slave code from new kernel, put in purgatory */ + elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + master_entry = slave_code[0]; + memcpy(slave_code, phdr->p_data, sizeof(slave_code)); + slave_code[0] = master_entry; + elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + my_panic_kernel = 1; + /* Set panic flag */ + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &my_panic_kernel, sizeof(my_panic_kernel)); + + /* Set backup address */ + my_backup_start = info->backup_start; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &my_backup_start, sizeof(my_backup_start)); + + /* Tell relocatable kernel to run at load address + * via word before slave code in purgatory + */ + + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + if (my_run_at_load == KERNEL_RUN_AT_ZERO_MAGIC) + my_run_at_load = 1; + /* else it should be a fixed offset image */ + elf_rel_set_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + } + + /* Set stack address */ + my_stack = locate_hole(info, 16*1024, 0, 0, max_addr, 1); + my_stack += 16*1024; + elf_rel_set_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + + /* Set toc */ + toc_addr = my_r2(&info->rhdr); + elf_rel_set_symbol(&info->rhdr, "my_toc", &toc_addr, sizeof(toc_addr)); + + /* Set debug */ + elf_rel_set_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + my_kernel = 0; + my_dt_offset = 0; + my_panic_kernel = 0; + my_backup_start = 0; + my_stack = 0; + toc_addr = 0; + my_run_at_load = 0; + my_debug = 0; + my_opal_base = 0; + my_opal_entry = 0; + + elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base, + sizeof(my_opal_base)); + elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry, + sizeof(my_opal_entry)); + elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel, + sizeof(my_panic_kernel)); + elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start, + sizeof(my_backup_start)); + elf_rel_get_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + elf_rel_get_symbol(&info->rhdr, "my_toc", &toc_addr, + sizeof(toc_addr)); + elf_rel_get_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + dbgprintf("info->entry is %p\n", info->entry); + dbgprintf("kernel is %llx\n", (unsigned long long)my_kernel); + dbgprintf("dt_offset is %llx\n", + (unsigned long long)my_dt_offset); + dbgprintf("run_at_load flag is %x\n", my_run_at_load); + dbgprintf("panic_kernel is %x\n", my_panic_kernel); + dbgprintf("backup_start is %llx\n", + (unsigned long long)my_backup_start); + dbgprintf("stack is %llx\n", (unsigned long long)my_stack); + dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr); + dbgprintf("purgatory size is %zu\n", purgatory_size); + dbgprintf("debug is %d\n", my_debug); + dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base); + dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry); + + return 0; +} + +void elf_ppc64_usage(void) +{ + fprintf(stderr, " --command-line=<Command line> command line to append.\n"); + fprintf(stderr, " --append=<Command line> same as --command-line.\n"); + fprintf(stderr, " --ramdisk=<filename> Initial RAM disk.\n"); + fprintf(stderr, " --initrd=<filename> same as --ramdisk.\n"); + fprintf(stderr, " --devicetreeblob=<filename> Specify device tree blob file.\n"); + fprintf(stderr, " "); + fprintf(stderr, "Not applicable while using --kexec-file-syscall.\n"); + fprintf(stderr, " --reuse-cmdline Use kernel command line from running system.\n"); + fprintf(stderr, " --dtb=<filename> same as --devicetreeblob.\n"); + + fprintf(stderr, "elf support is still broken\n"); +} diff --git a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c new file mode 100644 index 0000000..51b1354 --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c @@ -0,0 +1,204 @@ +#include <stdio.h> +#include <elf.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc64.h" + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STO_PPC64_LOCAL_BIT 5 +#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other) \ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(struct mem_sym *sym) +{ + /* If this symbol has a local entry point, use it. */ + return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +static unsigned int local_entry_offset(struct mem_sym *UNUSED(sym)) +{ + return 0; +} +#endif + +static struct mem_shdr *toc_section(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr, *shdr_end; + unsigned char *strtab; + + strtab = (unsigned char *)ehdr->e_shdr[ehdr->e_shstrndx].sh_data; + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + for (shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (shdr->sh_size && + strcmp((char *)&strtab[shdr->sh_name], ".toc") == 0) { + return shdr; + } + } + + return NULL; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + struct mem_shdr *toc; + + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_PPC64) { + return 0; + } + + /* Ensure .toc is sufficiently aligned. */ + toc = toc_section(ehdr); + if (toc && toc->sh_addralign < 256) + toc->sh_addralign = 256; + return 1; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +unsigned long my_r2(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr; + + shdr = toc_section(ehdr); + if (!shdr) { + die("TOC reloc without a toc section?"); + } + + return shdr->sh_addr + 0x8000; +} + +static void do_relative_toc(unsigned long value, uint16_t *location, + unsigned long mask, int complain_signed) +{ + if (complain_signed && (value + 0x8000 > 0xffff)) { + die("TOC16 relocation overflows (%lu)\n", value); + } + + if ((~mask & 0xffff) & value) { + die("bad TOC16 relocation (%lu)\n", value); + } + + *location = (*location & ~mask) | (value & mask); +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *sym, + unsigned long r_type, void *location, unsigned long address, + unsigned long value) +{ + switch(r_type) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC64_ADDR64: + case R_PPC64_REL64: + /* Simply set it */ + *(uint64_t *)location = value; + break; + + case R_PPC64_REL32: + *(uint32_t *)location = value - (uint32_t)location; + break; + + case R_PPC64_TOC: + *(uint64_t *)location = my_r2(ehdr); + break; + + case R_PPC64_TOC16: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 1); + break; + + case R_PPC64_TOC16_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 1); + break; + + case R_PPC64_TOC16_LO: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 0); + break; + + case R_PPC64_TOC16_LO_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 0); + break; + + case R_PPC64_TOC16_HI: + do_relative_toc((value - my_r2(ehdr)) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_TOC16_HA: + do_relative_toc((value - my_r2(ehdr) + 0x8000) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_REL24: + value += local_entry_offset(sym); + /* Convert value to relative */ + value -= address; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0) { + die("REL24 %li out of range!\n", (long int)value); + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | + (value & 0x03fffffc); + break; + + case R_PPC64_ADDR16_LO: + *(uint16_t *)location = value & 0xffff; + break; + + case R_PPC64_ADDR16_HI: + *(uint16_t *)location = (value >> 16) & 0xffff; + break; + + case R_PPC64_ADDR16_HA: + *(uint16_t *)location = (((value + 0x8000) >> 16) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHER: + *(uint16_t *)location = (((uint64_t)value >> 32) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(uint16_t *)location = (((uint64_t)value >> 48) & 0xffff); + break; + + /* R_PPC64_REL16_HA and R_PPC64_REL16_LO are handled to support + * ABIv2 r2 assignment based on r12 for PIC executable. + * Here address is know so replace + * 0: addis 2,12,.TOC.-0b@ha + * addi 2,2,.TOC.-0b@l + * by + * lis 2,.TOC.@ha + * addi 2,2,.TOC.@l + */ + case R_PPC64_REL16_HA: + /* check that we are dealing with the addis 2,12 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x3c4c0000) + die("Unexpected instruction for R_PPC64_REL16_HA"); + value += my_r2(ehdr); + /* replacing by lis 2 */ + *(uint32_t *)location = 0x3c400000 + ((value >> 16) & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* check that we are dealing with the addi 2,2 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x38420000) + die("Unexpected instruction for R_PPC64_REL16_LO"); + + value += my_r2(ehdr) - 4; + *(uint16_t *)location = value & 0xffff; + break; + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c new file mode 100644 index 0000000..611809f --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -0,0 +1,969 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <arch/options.h> + +static struct memory_range *exclude_range = NULL; +static struct memory_range *memory_range = NULL; +static struct memory_range *base_memory_range = NULL; +static uint64_t rma_top; +uint64_t memory_max = 0; +uint64_t memory_limit; +static int nr_memory_ranges, nr_exclude_ranges; +uint64_t crash_base, crash_size; +unsigned int rtas_base, rtas_size; +uint64_t opal_base, opal_size; +int max_memory_ranges; + +static void cleanup_memory_ranges(void) +{ + if (memory_range) + free(memory_range); + if (base_memory_range) + free(base_memory_range); + if (exclude_range) + free(exclude_range); + if (usablemem_rgns.ranges) + free(usablemem_rgns.ranges); +} + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = (struct memory_range *) malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = (struct memory_range *) malloc(memory_range_len); + if (!exclude_range) + goto err1; + + usablemem_rgns.ranges = (struct memory_range *) + malloc(memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + memset(usablemem_rgns.ranges, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; + +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + + +static void add_base_memory_range(uint64_t start, uint64_t end) +{ + base_memory_range[nr_memory_ranges].start = start; + base_memory_range[nr_memory_ranges].end = end; + base_memory_range[nr_memory_ranges].type = RANGE_RAM; + nr_memory_ranges++; + if (nr_memory_ranges >= max_memory_ranges) + realloc_memory_ranges(); + + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[nr_memory_ranges-1].start, + base_memory_range[nr_memory_ranges-1].end, + base_memory_range[nr_memory_ranges-1].type); +} + +static int get_dyn_reconf_base_ranges(void) +{ + uint64_t start, end; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,lmb-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + if (fread(buf, 1, 8, file) != 8) { + perror(fname); + fclose(file); + return -1; + } + /* + * lmb_size, num_of_lmb_sets(global variables) are + * initialized once here. + */ + size = lmb_size = be64_to_cpu(((uint64_t *)buf)[0]); + fclose(file); + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, + "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if ((file = fopen(fname, "r")) == NULL) { + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + is_dyn_mem_v2 = 1; + } + + /* first 4 bytes tell the number of lmb set entries */ + if (fread(buf, 1, 4, file) != 4) { + perror(fname); + fclose(file); + return -1; + } + num_of_lmb_sets = be32_to_cpu(((unsigned int *)buf)[0]); + + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + add_base_memory_range(start, end); + } + fclose(file); + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + uint64_t start, end; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)) { + get_dyn_reconf_base_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0) + break; + } + start = be64_to_cpu(((uint64_t *)buf)[0]); + end = start + be64_to_cpu(((uint64_t *)buf)[1]); + add_base_memory_range(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index) +{ + char fname[256], buf[16]; + FILE *file; + int i = *range_index; + + strcpy(fname, "/proc/device-tree/reserved-ranges"); + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return; + } + errno = 0; + /* File not present. Non PowerKVM system. */ + return; + } + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) { + uint64_t base, size; + + base = be64_to_cpu(((uint64_t *)buf)[0]); + size = be64_to_cpu(((uint64_t *)buf)[1]); + + exclude_range[i].start = base; + exclude_range[i].end = base + size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + reserve(base, size); + } + fclose(file); + *range_index = i; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rma_base = -1, base; + uint64_t tce_base; + unsigned int tce_size; + uint64_t htab_base, htab_size; + uint64_t kernel_end; + uint64_t initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + struct stat fstat; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + scan_reserved_ranges(kexec_flags, &i); + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory") && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4) && + strncmp(dentry->d_name, "ibm,opal", 8)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + strcat(fname, "/linux,kernel-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&kernel_end, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + kernel_end = be64_to_cpu(kernel_end); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + if (kexec_flags & KEXEC_ON_CRASH) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_base, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_base = be64_to_cpu(crash_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_size, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_size = be64_to_cpu(crash_size); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + + add_usable_mem_rgns(0, crash_base + crash_size); + reserve(KDUMP_BACKUP_LIMIT, crash_base-KDUMP_BACKUP_LIMIT); + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,memory-limit"); + if ((file = fopen(fname, "r")) == NULL) { + if (errno != ENOENT) { + perror(fname); + goto error_opencdir; + } + errno = 0; + /* + * File not present. + * fall through. On older kernel this file + * is not present. + */ + } else { + if (fread(&memory_limit, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + memory_limit = be64_to_cpu(memory_limit); + } + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_base = be64_to_cpu(htab_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_size = be64_to_cpu(htab_size); + + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + /* reserve the initrd_start and end locations. */ + if (reuse_initrd) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-start"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_start, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_start = be64_to_cpu(initrd_start); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_end, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_end = be64_to_cpu(initrd_end); + fclose(file); + + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* chosen */ + + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + rtas_base = be32_to_cpu(rtas_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + rtas_size = be32_to_cpu(rtas_size); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (strncmp(dentry->d_name, "ibm,opal", 8) == 0) { + strcat(fname, "/opal-base-address"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + opal_base = be64_to_cpu(opal_base); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/opal-runtime-size"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + opal_size = be64_to_cpu(opal_size); + /* Add OPAL to exclude_range */ + exclude_range[i].start = opal_base; + exclude_range[i].end = opal_base + opal_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(opal_base, opal_size); + } /* ibm,opal */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + base = be64_to_cpu(((uint64_t *)buf)[0]); + if (base < rma_base) { + rma_base = base; + rma_top = base + be64_to_cpu(((uint64_t *)buf)[1]); + } + + fclose(file); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_base = be64_to_cpu(tce_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,tce-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_size = be32_to_cpu(tce_size); + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + +/* Setup a sorted list of memory ranges. */ +int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit the end to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit range to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + } + nr_memory_ranges = j; + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + +/* Return a list of valid memory ranges */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + /* allocate memory_range dynamically */ + max_memory_ranges = 1; + + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + /* + * copy the memory here, another realloc_memory_ranges might + * corrupt the old memory + */ + *range = calloc(sizeof(struct memory_range), nr_memory_ranges); + if (*range == NULL) + return -1; + memmove(*range, memory_range, + sizeof(struct memory_range) * nr_memory_ranges); + + *ranges = nr_memory_ranges; + dbgprintf("get memory ranges:%d\n", nr_memory_ranges); + return 0; +} + +struct file_type file_type[] = { + { "elf-ppc64", elf_ppc64_probe, elf_ppc64_load, elf_ppc64_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + fprintf(stderr, " --elf64-core-headers Prepare core headers in ELF64 format\n"); + fprintf(stderr, " --dt-no-old-root Do not reuse old kernel root= param.\n" \ + " while creating flatten device tree.\n"); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* We are running a 32-bit kexec-tools on 64-bit ppc64. + * So pass KEXEC_ARCH_PPC64 here + */ + { "ppc64", KEXEC_ARCH_PPC64 }, + { "ppc64le", KEXEC_ARCH_PPC64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h new file mode 100644 index 0000000..434b4bf --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_PPC64_H +#define KEXEC_PPC64_H + +#define PATH_LEN 256 +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 17 +#if (BOOT_BLOCK_VERSION < 16) +# define NEED_STRUCTURE_BLOCK_EXTRA_PAD +#endif +#define HAVE_DYNAMIC_MEMORY +#define NEED_RESERVE_DTB + +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); + +int setup_memory_ranges(unsigned long kexec_flags); + +int elf_ppc64_probe(const char *buf, off_t len); +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc64_usage(void); + +struct mem_ehdr; +unsigned long my_r2(const struct mem_ehdr *ehdr); + +extern uint64_t initrd_base, initrd_size; +extern int max_memory_ranges; +extern unsigned char reuse_initrd; + +struct arch_options_t { + int core_header_type; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; + +extern mem_rgns_t usablemem_rgns; + +#endif /* KEXEC_PPC64_H */ diff --git a/kexec/arch/ppc64/kexec-zImage-ppc64.c b/kexec/arch/ppc64/kexec-zImage-ppc64.c new file mode 100644 index 0000000..e946205 --- /dev/null +++ b/kexec/arch/ppc64/kexec-zImage-ppc64.c @@ -0,0 +1,184 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" + +#define MAX_HEADERS 32 + +int zImage_ppc64_probe(FILE *file) +{ + Elf32_Ehdr elf; + int valid; + + if (fseek(file, 0, SEEK_SET) < 0) { + fprintf(stderr, "seek error: %s\n", + strerror(errno)); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + fprintf(stderr, "read error: %s\n", + strerror(errno)); + return -1; + } + + if (elf.e_machine == EM_PPC64) { + fprintf(stderr, "Elf64 not supported\n"); + return -1; + } + + valid = (elf.e_ident[EI_MAG0] == ELFMAG0 && + elf.e_ident[EI_MAG1] == ELFMAG1 && + elf.e_ident[EI_MAG2] == ELFMAG2 && + elf.e_ident[EI_MAG3] == ELFMAG3 && + elf.e_ident[EI_CLASS] == ELFCLASS32 && + elf.e_ident[EI_DATA] == ELFDATA2MSB && + elf.e_type == ET_EXEC && + elf.e_machine == EM_PPC); + + return valid ? 0 : -1; +} + +int zImage_ppc64_load(FILE *file, int UNUSED(argc), char **UNUSED(argv), + void **ret_entry, struct kexec_segment **ret_segments, + int *ret_nr_segments) +{ + Elf32_Ehdr elf; + Elf32_Phdr *p, *ph; + struct kexec_segment *segment; + int i; + unsigned long memsize, filesize, offset, load_loc = 0; + + /* Parse command line arguments */ + + /* Read in the Elf32 header */ + if (fseek(file, 0, SEEK_SET) < 0) { + perror("seek error:"); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + perror("read error: "); + return -1; + } + if (elf.e_phnum > MAX_HEADERS) { + fprintf(stderr, + "Only kernels with %i program headers are supported\n", + MAX_HEADERS); + return -1; + } + + /* Read the section header */ + ph = (Elf32_Phdr *)malloc(sizeof(Elf32_Phdr) * elf.e_phnum); + if (ph == 0) { + perror("malloc failed: "); + return -1; + } + if (fseek(file, elf.e_phoff, SEEK_SET) < 0) { + perror("seek failed: "); + free(ph); + return -1; + } + if (fread(ph, sizeof(Elf32_Phdr) * elf.e_phnum, 1, file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + + *ret_segments = malloc(elf.e_phnum * sizeof(struct kexec_segment)); + if (*ret_segments == 0) { + fprintf(stderr, "malloc failed: %s\n", + strerror(errno)); + free(ph); + return -1; + } + segment = ret_segments[0]; + + /* Scan through the program header */ + memsize = filesize = offset = 0; + p = ph; + for (i = 0; i < elf.e_phnum; ++i, ++p) { + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + if (memsize == 0) { + offset = p->p_offset; + memsize = p->p_memsz; + filesize = p->p_filesz; + load_loc = p->p_vaddr; + } else { + memsize = p->p_offset + p->p_memsz - offset; + filesize = p->p_offset + p->p_filesz - offset; + } + } + if (memsize == 0) { + fprintf(stderr, "Can't find a loadable segment.\n"); + free(ph); + return -1; + } + + /* Load program segments */ + p = ph; + segment->buf = malloc(filesize); + if (segment->buf == 0) { + perror("malloc failed: "); + free(ph); + return -1; + } + for (i = 0; i < elf.e_phnum; ++i, ++p) { + unsigned long mem_offset; + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + + /* skip to the actual image */ + if (fseek(file, p->p_offset, SEEK_SET) < 0) { + perror("seek error: "); + free(ph); + return -1; + } + mem_offset = p->p_vaddr - load_loc; + if (fread((void *)segment->buf+mem_offset, p->p_filesz, 1, + file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + } + segment->mem = (void *) load_loc; + segment->memsz = memsize; + segment->bufsz = filesize; + *ret_entry = (void *)(uintptr_t)elf.e_entry; + *ret_nr_segments = i - 1; + free(ph); + return 0; +} + +void zImage_ppc64_usage(void) +{ + fprintf(stderr, "zImage support is still broken\n"); +} diff --git a/kexec/arch/s390/Makefile b/kexec/arch/s390/Makefile new file mode 100644 index 0000000..fab3e68 --- /dev/null +++ b/kexec/arch/s390/Makefile @@ -0,0 +1,11 @@ +# +# kexec s390 (linux booting linux) +# +s390_KEXEC_SRCS = kexec/arch/s390/kexec-s390.c +s390_KEXEC_SRCS += kexec/arch/s390/kexec-image.c +s390_KEXEC_SRCS += kexec/arch/s390/kexec-elf-rel-s390.c +s390_KEXEC_SRCS += kexec/arch/s390/crashdump-s390.c + +dist += kexec/arch/s390/Makefile $(s390_KEXEC_SRCS) \ + kexec/arch/s390/kexec-s390.h \ + kexec/arch/s390/include/arch/options.h diff --git a/kexec/arch/s390/crashdump-s390.c b/kexec/arch/s390/crashdump-s390.c new file mode 100644 index 0000000..3bd9efe --- /dev/null +++ b/kexec/arch/s390/crashdump-s390.c @@ -0,0 +1,90 @@ +/* + * kexec/arch/s390/crashdump-s390.c + * + * Copyright IBM Corp. 2011 + * + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifdef __s390x__ +#define _GNU_SOURCE + +#include <stdio.h> +#include <elf.h> +#include <limits.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec/crashdump.h" +#include "kexec-s390.h" + +/* + * Create ELF core header + */ +static int create_elf_header(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ +#ifdef WITH_ELF_HEADER + static struct memory_range crash_memory_range[MAX_MEMORY_RANGES]; + unsigned long elfcorehdr, elfcorehdr_size, bufsz; + struct crash_elf_info elf_info; + char str[COMMAND_LINESIZE]; + int ranges; + void *tmp; + + memset(&elf_info, 0, sizeof(elf_info)); + + elf_info.data = ELFDATA2MSB; + elf_info.machine = EM_S390; + elf_info.class = ELFCLASS64; + elf_info.get_note_info = get_crash_notes_per_cpu; + + if (get_memory_ranges_s390(crash_memory_range, &ranges, 0)) + return -1; + + if (crash_create_elf64_headers(info, &elf_info, crash_memory_range, + ranges, &tmp, &bufsz, + ELF_CORE_HEADER_ALIGN)) + return -1; + + elfcorehdr = add_buffer(info, tmp, bufsz, bufsz, 1024, + crash_base, crash_end, -1); + elfcorehdr_size = bufsz; + snprintf(str, sizeof(str), " elfcorehdr=%ld@%ldK\n", + elfcorehdr_size, elfcorehdr / 1024); + if (command_line_add(info, str)) + return -1; +#endif + return 0; +} + +/* + * Load additional segments for kdump kernel + */ +int load_crashdump_segments(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ + unsigned long crash_size = crash_size = crash_end - crash_base + 1; + + if (create_elf_header(info, crash_base, crash_end)) + return -1; + elf_rel_build_load(info, &info->rhdr, (const char *) purgatory, + purgatory_size, crash_base + 0x2000, + crash_base + 0x10000, -1, 0); + elf_rel_set_symbol(&info->rhdr, "crash_base", &crash_base, + sizeof(crash_base)); + elf_rel_set_symbol(&info->rhdr, "crash_size", &crash_size, + sizeof(crash_size)); + info->entry = (void *) elf_rel_get_addr(&info->rhdr, "purgatory_start"); + return 0; +} +#else +/* + * kdump is not available for s390 + */ +int load_crashdump_segments(struct kexec_info *info, unsigned long crash_base, + unsigned long crash_end) +{ + return -1; +} +#endif diff --git a/kexec/arch/s390/include/arch/options.h b/kexec/arch/s390/include/arch/options.h new file mode 100644 index 0000000..c150244 --- /dev/null +++ b/kexec/arch/s390/include/arch/options.h @@ -0,0 +1,40 @@ +#ifndef KEXEC_ARCH_S390_OPTIONS_H +#define KEXEC_ARCH_S390_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_MAX+0) +#define OPT_RAMDISK (OPT_MAX+1) +#define OPT_REUSE_CMDLINE (OPT_MAX+2) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"initrd", 1, 0, OPT_RAMDISK}, \ + {"reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_S390_OPTIONS_H */ diff --git a/kexec/arch/s390/kexec-elf-rel-s390.c b/kexec/arch/s390/kexec-elf-rel-s390.c new file mode 100644 index 0000000..91ba86a --- /dev/null +++ b/kexec/arch/s390/kexec-elf-rel-s390.c @@ -0,0 +1,78 @@ +/* + * kexec/arch/s390/kexec-elf-rel-s390.c + * + * Copyright IBM Corp. 2005,2011 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->ei_class != ELFCLASS64) + return 0; + if (ehdr->e_machine != EM_S390) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), + unsigned long r_type, + void *loc, + unsigned long address, + unsigned long val) +{ + switch (r_type) { + case R_390_8: /* Direct 8 bit. */ + case R_390_12: /* Direct 12 bit. */ + case R_390_16: /* Direct 16 bit. */ + case R_390_20: /* Direct 20 bit. */ + case R_390_32: /* Direct 32 bit. */ + case R_390_64: /* Direct 64 bit. */ + if (r_type == R_390_8) + *(unsigned char *) loc = val; + else if (r_type == R_390_12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (r_type == R_390_16) + *(unsigned short *) loc = val; + else if (r_type == R_390_20) + *(unsigned int *) loc = + (*(unsigned int *) loc & 0xf00000ff) | + (val & 0xfff) << 16 | (val & 0xff000) >> 4; + else if (r_type == R_390_32) + *(unsigned int *) loc = val; + else if (r_type == R_390_64) + *(unsigned long *) loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ + case R_390_PC32: /* PC relative 32 bit. */ + case R_390_PC64: /* PC relative 64 bit. */ + val -= address; + if (r_type == R_390_PC16) + *(unsigned short *) loc = val; + else if (r_type == R_390_PC16DBL) + *(unsigned short *) loc = val >> 1; + else if (r_type == R_390_PC32DBL || r_type == R_390_PLT32DBL) + *(unsigned int *) loc = val >> 1; + else if (r_type == R_390_PC32) + *(unsigned int *) loc = val; + else if (r_type == R_390_PC64) + *(unsigned long *) loc = val; + break; + default: + die("Unknown rela relocation: 0x%lx 0x%lx\n", r_type, address); + break; + } +} diff --git a/kexec/arch/s390/kexec-image.c b/kexec/arch/s390/kexec-image.c new file mode 100644 index 0000000..69aaf96 --- /dev/null +++ b/kexec/arch/s390/kexec-image.c @@ -0,0 +1,236 @@ +/* + * kexec/arch/s390/kexec-image.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec/crashdump.h" +#include "kexec-s390.h" +#include <arch/options.h> +#include <fcntl.h> + +static uint64_t crash_base, crash_end; + +static void add_segment_check(struct kexec_info *info, const void *buf, + size_t bufsz, unsigned long base, size_t memsz) +{ + if (info->kexec_flags & KEXEC_ON_CRASH) + if (base + memsz > crash_end - crash_base) + die("Not enough crashkernel memory to load segments\n"); + add_segment(info, buf, bufsz, crash_base + base, memsz); +} + +int command_line_add(struct kexec_info *info, const char *str) +{ + char *tmp = NULL; + + tmp = concat_cmdline(info->command_line, str); + if (!tmp) { + fprintf(stderr, "out of memory\n"); + return -1; + } + + free(info->command_line); + info->command_line = tmp; + return 0; +} + +int image_s390_load_file(int argc, char **argv, struct kexec_info *info) +{ + const char *ramdisk = NULL; + int opt; + + static const struct option options[] = + { + KEXEC_ALL_OPTIONS + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_OPT_STR ""; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + case OPT_APPEND: + if (command_line_add(info, optarg)) + return -1; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REUSE_CMDLINE: + free(info->command_line); + info->command_line = get_command_line(); + break; + } + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + free(info->command_line); + info->command_line = NULL; + return -1; + } + } + + if (info->command_line) + info->command_line_len = strlen(info->command_line) + 1; + else + info->command_line_len = 0; + return 0; +} + +int +image_s390_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + void *krnl_buffer; + char *rd_buffer; + const char *ramdisk; + off_t ramdisk_len; + unsigned int ramdisk_origin; + int opt, ret = -1; + + if (info->file_mode) + return image_s390_load_file(argc, argv, info); + + static const struct option options[] = + { + KEXEC_ALL_OPTIONS + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_OPT_STR ""; + + ramdisk = NULL; + ramdisk_len = 0; + ramdisk_origin = 0; + + while ((opt = getopt_long(argc,argv,short_options,options,0)) != -1) { + switch(opt) { + case OPT_APPEND: + if (command_line_add(info, optarg)) + return -1; + break; + case OPT_REUSE_CMDLINE: + free(info->command_line); + info->command_line = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + } + } + + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (parse_iomem_single("Crash kernel\n", &crash_base, + &crash_end)) + goto out; + } + + /* Add kernel segment */ + add_segment_check(info, kernel_buf + IMAGE_READ_OFFSET, + kernel_size - IMAGE_READ_OFFSET, IMAGE_READ_OFFSET, + kernel_size - IMAGE_READ_OFFSET); + + /* We do want to change the kernel image */ + krnl_buffer = (void *) kernel_buf + IMAGE_READ_OFFSET; + + /* + * Load ramdisk if present: If image is larger than RAMDISK_ORIGIN_ADDR, + * we load the ramdisk directly behind the image with 1 MiB alignment. + */ + if (ramdisk) { + rd_buffer = slurp_file_mmap(ramdisk, &ramdisk_len); + if (rd_buffer == NULL) { + fprintf(stderr, "Could not read ramdisk.\n"); + goto out; + } + ramdisk_origin = MAX(RAMDISK_ORIGIN_ADDR, kernel_size); + ramdisk_origin = _ALIGN_UP(ramdisk_origin, 0x100000); + add_segment_check(info, rd_buffer, ramdisk_len, + ramdisk_origin, ramdisk_len); + } + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (load_crashdump_segments(info, crash_base, crash_end)) + goto out; + } else { + info->entry = (void *) IMAGE_READ_OFFSET; + } + + /* Register the ramdisk and crashkernel memory in the kernel. */ + { + unsigned long long *tmp; + + tmp = krnl_buffer + INITRD_START_OFFS; + *tmp = (unsigned long long) ramdisk_origin; + + tmp = krnl_buffer + INITRD_SIZE_OFFS; + *tmp = (unsigned long long) ramdisk_len; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + tmp = krnl_buffer + OLDMEM_BASE_OFFS; + *tmp = crash_base; + + tmp = krnl_buffer + OLDMEM_SIZE_OFFS; + *tmp = crash_end - crash_base + 1; + } + } + + if (info->command_line) { + unsigned long maxsize; + char *dest = krnl_buffer + COMMAND_LINE_OFFS; + + maxsize = *(unsigned long *)(krnl_buffer + MAX_COMMAND_LINESIZE_OFFS); + if (!maxsize) + maxsize = LEGACY_COMMAND_LINESIZE; + + if (strlen(info->command_line) > maxsize-1) { + fprintf(stderr, "command line too long, maximum allowed size %ld\n", + maxsize-1); + goto out; + } + strncpy(dest, info->command_line, maxsize-1); + dest[maxsize-1] = '\0'; + } + ret = 0; +out: + free(info->command_line); + info->command_line = NULL; + return ret; +} + +int +image_s390_probe(const char *UNUSED(kernel_buf), off_t UNUSED(kernel_size)) +{ + /* + * Can't reliably tell if an image is valid, + * therefore everything is valid. + */ + return 0; +} + +void +image_s390_usage(void) +{ + printf("--command-line=STRING Set the kernel command line to STRING.\n" + "--append=STRING Set the kernel command line to STRING.\n" + "--initrd=FILENAME Use the file FILENAME as a ramdisk.\n" + "--reuse-cmdline Use kernel command line from running system.\n" + ); +} diff --git a/kexec/arch/s390/kexec-s390.c b/kexec/arch/s390/kexec-s390.c new file mode 100644 index 0000000..33ba6b9 --- /dev/null +++ b/kexec/arch/s390/kexec-s390.c @@ -0,0 +1,269 @@ +/* + * kexec/arch/s390/kexec-s390.c + * + * Copyright IBM Corp. 2005,2011 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + * + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <dirent.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-s390.h" +#include <arch/options.h> + +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +/* + * Read string from file + */ +static void read_str(char *string, const char *path, size_t len) +{ + size_t rc; + FILE *fh; + + fh = fopen(path, "rb"); + if (fh == NULL) + die("Could not open \"%s\"", path); + rc = fread(string, 1, len - 1, fh); + if (rc == 0 && ferror(fh)) + die("Could not read \"%s\"", path); + fclose(fh); + string[rc] = 0; + if (string[strlen(string) - 1] == '\n') + string[strlen(string) - 1] = 0; +} + +/* + * Return number of memory chunks + */ +static int memory_range_cnt(struct memory_range chunks[]) +{ + int i; + + for (i = 0; i < MAX_MEMORY_RANGES; i++) { + if (chunks[i].end == 0) + break; + } + return i; +} + +/* + * Create memory hole with given address and size + * + * lh = local hole + */ +static void add_mem_hole(struct memory_range chunks[], unsigned long addr, + unsigned long size) +{ + unsigned long lh_start, lh_end, lh_size, chunk_cnt; + int i; + + chunk_cnt = memory_range_cnt(chunks); + + for (i = 0; i < chunk_cnt; i++) { + if (addr + size <= chunks[i].start) + break; + if (addr > chunks[i].end) + continue; + lh_start = MAX(addr, chunks[i].start); + lh_end = MIN(addr + size - 1, chunks[i].end); + lh_size = lh_end - lh_start + 1; + if (lh_start == chunks[i].start && lh_end == chunks[i].end) { + /* Remove chunk */ + memmove(&chunks[i], &chunks[i + 1], + sizeof(struct memory_range) * + (MAX_MEMORY_RANGES - (i + 1))); + memset(&chunks[MAX_MEMORY_RANGES - 1], 0, + sizeof(struct memory_range)); + chunk_cnt--; + i--; + } else if (lh_start == chunks[i].start) { + /* Make chunk smaller at start */ + chunks[i].start = chunks[i].start + lh_size; + break; + } else if (lh_end == chunks[i].end) { + /* Make chunk smaller at end */ + chunks[i].end = lh_start - 1; + } else { + /* Split chunk into two */ + if (chunk_cnt >= MAX_MEMORY_RANGES) + die("Unable to create memory hole: %i", i); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct memory_range) * + (MAX_MEMORY_RANGES - (i + 1))); + chunks[i + 1].start = lh_start + lh_size; + chunks[i].end = lh_start - 1; + break; + } + } +} + +/* + * Remove offline memory from memory chunks + */ +static void remove_offline_memory(struct memory_range memory_range[]) +{ + unsigned long block_size, chunk_nr; + struct dirent *dirent; + char path[PATH_MAX]; + char str[64]; + DIR *dir; + + read_str(str, "/sys/devices/system/memory/block_size_bytes", + sizeof(str)); + sscanf(str, "%lx", &block_size); + + dir = opendir("/sys/devices/system/memory"); + if (!dir) + die("Could not read \"/sys/devices/system/memory\""); + while ((dirent = readdir(dir))) { + if (sscanf(dirent->d_name, "memory%ld\n", &chunk_nr) != 1) + continue; + sprintf(path, "/sys/devices/system/memory/%s/state", + dirent->d_name); + read_str(str, path, sizeof(str)); + if (strncmp(str, "offline", 6) != 0) + continue; + add_mem_hole(memory_range, chunk_nr * block_size, block_size); + } + closedir(dir); +} + +/* + * Get memory ranges of type "System RAM" from /proc/iomem. If with_crashk=1 + * then also type "Crash kernel" is added. + */ +int get_memory_ranges_s390(struct memory_range memory_range[], int *ranges, + int with_crashk) +{ + char crash_kernel[] = "Crash kernel\n"; + char sys_ram[] = "System RAM\n"; + const char *iomem = proc_iomem(); + FILE *fp; + char line[80]; + int current_range = 0; + + fp = fopen(iomem,"r"); + if(fp == 0) { + fprintf(stderr,"Unable to open %s: %s\n",iomem,strerror(errno)); + return -1; + } + + /* Setup the compare string properly. */ + while (fgets(line, sizeof(line), fp) != 0) { + unsigned long long start, end; + int cons; + char *str; + + if (current_range == MAX_MEMORY_RANGES) + break; + + sscanf(line,"%llx-%llx : %n", &start, &end, &cons); + str = line+cons; + if ((memcmp(str, sys_ram, strlen(sys_ram)) == 0) || + ((memcmp(str, crash_kernel, strlen(crash_kernel)) == 0) && + with_crashk)) { + memory_range[current_range].start = start; + memory_range[current_range].end = end; + memory_range[current_range].type = RANGE_RAM; + current_range++; + } + else { + continue; + } + } + fclose(fp); + remove_offline_memory(memory_range); + *ranges = memory_range_cnt(memory_range); + return 0; +} + +/* + * get_memory_ranges: + * Return a list of memory ranges by parsing the file returned by + * proc_iomem() + * + * INPUT: + * - Pointer to an array of memory_range structures. + * - Pointer to an integer with holds the number of memory ranges. + * + * RETURN: + * - 0 on normal execution. + * - (-1) if something went wrong. + */ + +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long flags) +{ + uint64_t start, end; + + if (get_memory_ranges_s390(memory_range, ranges, + flags & KEXEC_ON_CRASH)) + return -1; + *range = memory_range; + if ((flags & KEXEC_ON_CRASH) && !(flags & KEXEC_PRESERVE_CONTEXT)) { + if (parse_iomem_single("Crash kernel\n", &start, &end)) + return -1; + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + { "image", image_s390_probe, image_s390_load, image_s390_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ +} + +int arch_process_options(int UNUSED(argc), char **UNUSED(argv)) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + { "s390", KEXEC_ARCH_S390 }, + { "s390x", KEXEC_ARCH_S390 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/s390/kexec-s390.h b/kexec/arch/s390/kexec-s390.h new file mode 100644 index 0000000..6a99518 --- /dev/null +++ b/kexec/arch/s390/kexec-s390.h @@ -0,0 +1,38 @@ +/* + * kexec/arch/s390/kexec-s390.h + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +#ifndef KEXEC_S390_H +#define KEXEC_S390_H + +#define IMAGE_READ_OFFSET 0x10000 + +#define RAMDISK_ORIGIN_ADDR 0x800000 +#define INITRD_START_OFFS 0x408 +#define INITRD_SIZE_OFFS 0x410 +#define OLDMEM_BASE_OFFS 0x418 +#define OLDMEM_SIZE_OFFS 0x420 +#define MAX_COMMAND_LINESIZE_OFFS 0x430 +#define COMMAND_LINE_OFFS 0x480 +#define LEGACY_COMMAND_LINESIZE 896 +#define MAX_MEMORY_RANGES 1024 + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +extern int image_s390_load(int, char **, const char *, off_t, struct kexec_info *); +extern int image_s390_probe(const char *, off_t); +extern void image_s390_usage(void); +extern int load_crashdump_segments(struct kexec_info *info, + unsigned long crash_base, + unsigned long crash_end); +extern int get_memory_ranges_s390(struct memory_range range[], int *ranges, + int with_crashk); +extern int command_line_add(struct kexec_info *info, const char *str); + +#endif /* KEXEC_S390_H */ diff --git a/kexec/arch/sh/Makefile b/kexec/arch/sh/Makefile new file mode 100644 index 0000000..7cf40ae --- /dev/null +++ b/kexec/arch/sh/Makefile @@ -0,0 +1,22 @@ +# +# kexec sh (linux booting linux) +# +sh_KEXEC_SRCS += kexec/arch/sh/kexec-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-uImage-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-zImage-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-netbsd-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-elf-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/kexec-elf-rel-sh.c +sh_KEXEC_SRCS += kexec/arch/sh/netbsd_booter.S +sh_KEXEC_SRCS += kexec/arch/sh/crashdump-sh.c + +sh_UIMAGE = kexec/kexec-uImage.c + +sh_ADD_BUFFER = +sh_ADD_SEGMENT = +sh_VIRT_TO_PHYS = + +dist += kexec/arch/sh/Makefile $(sh_KEXEC_SRCS) \ + kexec/arch/sh/kexec-sh.h \ + kexec/arch/sh/crashdump-sh.h \ + kexec/arch/sh/include/arch/options.h diff --git a/kexec/arch/sh/crashdump-sh.c b/kexec/arch/sh/crashdump-sh.c new file mode 100644 index 0000000..36e9aaf --- /dev/null +++ b/kexec/arch/sh/crashdump-sh.c @@ -0,0 +1,166 @@ +/* + * crashdump-sh.c - crashdump for SuperH + * Copyright (C) 2008 Magnus Damm + * + * Based on x86 and ppc64 implementation, written by + * Vivek Goyal (vgoyal@in.ibm.com), R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-sh.h" +#include "crashdump-sh.h" +#include <arch/options.h> + +#define CRASH_MAX_MEMORY_RANGES 64 +static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; + +static int crash_sh_range_nr; +static int crash_sh_memory_range_callback(void *UNUSED(data), int UNUSED(nr), + char *str, + unsigned long long base, + unsigned long long length) +{ + + struct memory_range *range = crash_memory_range; + struct memory_range *range2 = crash_memory_range; + + range += crash_sh_range_nr; + + if (crash_sh_range_nr >= CRASH_MAX_MEMORY_RANGES) { + return 1; + } + + if (strncmp(str, "System RAM\n", 11) == 0) { + range->start = base; + range->end = base + length - 1; + range->type = RANGE_RAM; + crash_sh_range_nr++; + } + + if (strncmp(str, "Crash kernel\n", 13) == 0) { + if (!crash_sh_range_nr) + die("Unsupported /proc/iomem format\n"); + + range2 = range - 1; + if ((base + length - 1) < range2->end) { + range->start = base + length; + range->end = range2->end; + range->type = RANGE_RAM; + crash_sh_range_nr++; + } + range2->end = base - 1; + } + + return 0; +} + +/* Return a sorted list of available memory ranges. */ +static int crash_get_memory_ranges(struct memory_range **range, int *ranges) +{ + crash_sh_range_nr = 0; + + kexec_iomem_for_each_line(NULL, crash_sh_memory_range_callback, NULL); + *range = crash_memory_range; + *ranges = crash_sh_range_nr; + return 0; +} + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2LSB, + machine: EM_SH, + page_offset: PAGE_OFFSET, +}; + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + if (cmdlen > (COMMAND_LINE_SIZE - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for storing elf headers for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges; + struct memory_range *mem_range; + + if (crash_get_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + if (crash_create_elf32_headers(info, &elf_info32, + mem_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) + return -1; + + elfcorehdr = add_buffer_phys_virt(info, tmp, sz, sz, 1024, + 0, 0xffffffff, -1, 0); + + dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + add_cmdline_param(mod_cmdline, elfcorehdr - mem_min, " mem=", "K"); + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + uint64_t start, end; + + return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ? + (start != end) : 0; +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + return parse_iomem_single("Crash kernel\n", start, end); +} diff --git a/kexec/arch/sh/crashdump-sh.h b/kexec/arch/sh/crashdump-sh.h new file mode 100644 index 0000000..c5d4102 --- /dev/null +++ b/kexec/arch/sh/crashdump-sh.h @@ -0,0 +1,9 @@ +#ifndef CRASHDUMP_SH_H +#define CRASHDUMP_SH_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline); + +#define PAGE_OFFSET 0x80000000 + +#endif /* CRASHDUMP_SH_H */ diff --git a/kexec/arch/sh/include/arch/options.h b/kexec/arch/sh/include/arch/options.h new file mode 100644 index 0000000..f923eb4 --- /dev/null +++ b/kexec/arch/sh/include/arch/options.h @@ -0,0 +1,42 @@ +#ifndef KEXEC_ARCH_SH_OPTIONS_H +#define KEXEC_ARCH_SH_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) +#define OPT_APPEND (OPT_ARCH_MAX+1) +#define OPT_EMPTYZERO (OPT_ARCH_MAX+2) +#define OPT_NBSD_HOWTO (OPT_ARCH_MAX+3) +#define OPT_NBSD_MROOT (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND}, \ + {"append", 1, 0, OPT_APPEND}, \ + {"empty-zero", 1, 0, OPT_APPEND}, \ + {"howto", 1, 0, OPT_NBSD_HOWTO}, \ + {"miniroot", 1, 0, OPT_NBSD_MROOT}, +/* These options seem to be loader-specific rather than superh-specific, so + * ought to be moved to KEXEC_ALL_OPTIONS below and parsed in the relevant + * loader, e.g. kexec-netbsd-sh.c + */ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_SH_OPTIONS_H */ diff --git a/kexec/arch/sh/kexec-elf-rel-sh.c b/kexec/arch/sh/kexec-elf-rel-sh.c new file mode 100644 index 0000000..3993ee8 --- /dev/null +++ b/kexec/arch/sh/kexec-elf-rel-sh.c @@ -0,0 +1,54 @@ +/* + * kexec-elf-rel-sh.c - ELF relocations for SuperH + * Copyright (C) 2008 Paul Mundt + * + * Based on the SHcompact module loader (arch/sh/kernel/module.c) in the + * Linux kernel, which is written by: + * + * Copyright (C) 2003 - 2008 Kaz Kojima & Paul Mundt + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + /* Intentionally don't bother with endianness validation, it's + * configurable */ + + if (ehdr->ei_class != ELFCLASS32) + return 0; + if (ehdr->e_machine != EM_SH) + return 0; + + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *orig_loc, + unsigned long UNUSED(address), unsigned long relocation) +{ + uint32_t *location = orig_loc; + uint32_t value; + + switch (r_type) { + case R_SH_DIR32: + value = get_unaligned(location); + value += relocation; + put_unaligned(value, location); + break; + case R_SH_REL32: + relocation = (relocation - (uint32_t)location); + value = get_unaligned(location); + value += relocation; + put_unaligned(value, location); + break; + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/sh/kexec-elf-sh.c b/kexec/arch/sh/kexec-elf-sh.c new file mode 100644 index 0000000..897552c --- /dev/null +++ b/kexec/arch/sh/kexec-elf-sh.c @@ -0,0 +1,136 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2008 Magnus Damm + * + * Based on x86 implementation, + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include <arch/options.h> +#include "crashdump-sh.h" +#include "kexec-sh.h" + +int elf_sh_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_SH) { + result = -1; + goto out; + } + + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_sh_usage(void) +{ + printf(" --append=STRING Set the kernel command line to STRING\n" + ); +} + +int elf_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line; + char *modified_cmdline; + struct mem_sym sym; + int opt, rc; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0, 0, 0, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* + * Parse the command line arguments + */ + command_line = modified_cmdline = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + rc = load_crashdump_segments(info, modified_cmdline); + if (rc < 0) + return -1; + /* Use new command line. */ + command_line = modified_cmdline; + } + + /* If we're booting a vmlinux then fill in empty_zero_page */ + if (elf_rel_find_symbol(&ehdr, "empty_zero_page", &sym) == 0) { + char *zp = (void *)ehdr.e_shdr[sym.st_shndx].sh_data; + + kexec_sh_setup_zero_page(zp, 4096, command_line); + } + + return 0; +} diff --git a/kexec/arch/sh/kexec-netbsd-sh.c b/kexec/arch/sh/kexec-netbsd-sh.c new file mode 100644 index 0000000..ed93759 --- /dev/null +++ b/kexec/arch/sh/kexec-netbsd-sh.c @@ -0,0 +1,149 @@ +/* + * kexec-netbsd-sh.c - kexec netbsd loader for the SH + * Copyright (C) 2005 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include <arch/options.h> + +static const int probe_debug = 0; +extern const unsigned char netbsd_booter[]; + +/* + * netbsd_sh_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int netbsd_sh_probe(const char *buf, off_t UNUSED(len)) +{ + Elf32_Ehdr *ehdr; + + ehdr = (Elf32_Ehdr *)buf; + if(memcmp(buf, ELFMAG, SELFMAG) != 0){ + return -1; + } + if (ehdr->e_machine != EM_SH) { + return -1; + } + return 0; +} + +void netbsd_sh_usage(void) +{ + printf( + " --howto=VALUE NetBSD kernel boot option.\n" + " --miniroot=FILE NetBSD miniroot ramdisk.\n\n"); +} + +int netbsd_sh_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + const char *howto, *miniroot; + unsigned long entry, start, size, psz; + char *miniroot_buf; + off_t miniroot_length; + unsigned int howto_value; + unsigned char *param; + unsigned long *paraml; + unsigned char *img; + + int opt; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + howto = miniroot = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_NBSD_HOWTO: + howto = optarg; + break; + case OPT_NBSD_MROOT: + miniroot = optarg; + break; + } + } + + /* howto */ + howto_value = 0; + if(howto){ + howto_value = strtol(howto, NULL, 0); + } + + psz = getpagesize(); + + /* Parse the Elf file */ + { + Elf32_Ehdr *ehdr; + Elf32_Phdr *phdr; + unsigned long bbs; + ehdr = (Elf32_Ehdr *)buf; + phdr = (Elf32_Phdr *)&buf[ehdr->e_phoff]; + + entry = ehdr->e_entry; + img = (unsigned char *)&buf[phdr->p_offset]; + start = (phdr->p_paddr) & 0x1fffffff; + bbs = phdr->p_filesz; + size = phdr->p_memsz; + + if(size < bbs){ + size = bbs; + } + + size = _ALIGN(size, psz); + memset(&img[bbs], 0, size-bbs); + add_segment(info, img, size, start, size); + start += size; + } + + /* miniroot file */ + miniroot_buf = 0; + if (miniroot) { + miniroot_buf = slurp_file(miniroot, &miniroot_length); + howto_value |= 0x200; + size = _ALIGN(miniroot_length, psz); + add_segment(info, miniroot_buf, size, start, size); + start += size; + } + + /* howto & bootinfo */ + param = xmalloc(4096); + memset(param, 0, 4096); + paraml = (unsigned long *) ¶m[256]; + memcpy(param, netbsd_booter, 256); + paraml[0] = entry; + paraml[1] = howto_value; + add_segment(info, param, 4096, start, 4096); + + /* For now we don't have arguments to pass :( */ + info->entry = (void *) (start | 0xa0000000); + return 0; +} diff --git a/kexec/arch/sh/kexec-sh.c b/kexec/arch/sh/kexec-sh.c new file mode 100644 index 0000000..ce341c8 --- /dev/null +++ b/kexec/arch/sh/kexec-sh.c @@ -0,0 +1,259 @@ +/* + * kexec-sh.c - kexec for the SH + * Copyright (C) 2004 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-sh.h" +#include <arch/options.h> + +#define MAX_MEMORY_RANGES 64 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +static int kexec_sh_memory_range_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + if (nr < MAX_MEMORY_RANGES) { + memory_range[nr].start = base; + memory_range[nr].end = base + length - 1; + memory_range[nr].type = RANGE_RAM; + return 0; + } + + return 1; +} + +/* Return a sorted list of available memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int nr, ret; + nr = kexec_iomem_for_each_line("System RAM\n", + kexec_sh_memory_range_callback, NULL); + *range = memory_range; + *ranges = nr; + + /* + * Redefine the memory region boundaries if kernel + * exports the limits and if it is panic kernel. + * Override user values only if kernel exported values are + * subset of user defined values. + */ + if (kexec_flags & KEXEC_ON_CRASH) { + unsigned long long start, end; + + ret = parse_iomem_single("Crash kernel\n", &start, &end); + if (ret != 0) { + fprintf(stderr, "parse_iomem_single failed.\n"); + return -1; + } + + if (start > mem_min) + mem_min = start; + if (end < mem_max) + mem_max = end; + } + + return 0; +} + +/* Supported file types and callbacks */ +struct file_type file_type[] = { + /* uImage is probed before zImage because the latter also accepts + uncompressed images. */ + { "uImage-sh", uImage_sh_probe, uImage_sh_load, zImage_sh_usage }, + { "zImage-sh", zImage_sh_probe, zImage_sh_load, zImage_sh_usage }, + { "elf-sh", elf_sh_probe, elf_sh_load, elf_sh_usage }, + { "netbsd-sh", netbsd_sh_probe, netbsd_sh_load, netbsd_sh_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + + +void arch_usage(void) +{ + + printf( + " none\n\n" + "Default options:\n" + " --append=\"%s\"\n" + " STRING of --append is set from /proc/cmdline as default.\n" + ,get_append()); + +} + +int arch_process_options(int argc, char **argv) +{ + /* The common options amongst loaders (e.g. --append) should be read + * here, and the loader-specific options (e.g. NetBSD stuff) should + * then be re-parsed in the loader. + * (e.g. in kexec-netbsd-sh.c, for example.) + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_MAX) { + break; + } + case OPT_APPEND: + case OPT_NBSD_HOWTO: + case OPT_NBSD_MROOT: + ; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_SH here. + */ + { "sh3", KEXEC_ARCH_DEFAULT }, + { "sh4", KEXEC_ARCH_DEFAULT }, + { "sh4a", KEXEC_ARCH_DEFAULT }, + { "sh4al-dsp", KEXEC_ARCH_DEFAULT }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +char append_buf[256]; + +char *get_append(void) +{ + FILE *fp; + int len; + if((fp = fopen("/proc/cmdline", "r")) == NULL){ + die("/proc/cmdline file open error !!\n"); + } + fgets(append_buf, 256, fp); + len = strlen(append_buf); + append_buf[len-1] = 0; + fclose(fp); + return append_buf; +} + +void kexec_sh_setup_zero_page(char *zero_page_buf, size_t zero_page_size, + char *cmd_line) +{ + size_t n = zero_page_size - 0x100; + + memset(zero_page_buf, 0, zero_page_size); + + if (cmd_line) { + if (n > strlen(cmd_line)) + n = strlen(cmd_line); + + memcpy(zero_page_buf + 0x100, cmd_line, n); + zero_page_buf[0x100 + n] = '\0'; + } +} + +static int is_32bit(void) +{ + const char *cpuinfo = "/proc/cpuinfo"; + char line[MAX_LINE]; + FILE *fp; + int status = 0; + + fp = fopen(cpuinfo, "r"); + if (!fp) + die("Cannot open %s\n", cpuinfo); + + while(fgets(line, sizeof(line), fp) != 0) { + const char *key = "address sizes"; + const char *value = " 32 bits physical"; + char *p; + if (strncmp(line, key, strlen(key))) + continue; + p = strchr(line + strlen(key), ':'); + if (!p) + continue; + if (!strncmp(p + 1, value, strlen(value))) + status = 1; + break; + } + + fclose(fp); + + return status; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + unsigned long seg = addr & 0xe0000000; + unsigned long long start = 0; + int have_32bit = is_32bit(); + + if (seg != 0x80000000 && (have_32bit || seg != 0xc0000000)) + die("Virtual address %p is not in P1%s\n", (void *)addr, + have_32bit ? "" : " or P2"); + + /* If 32bit addressing is used then the base of system RAM + * is an offset into physical memory. */ + if (have_32bit) { + unsigned long long end; + int ret; + + /* Assume there is only one "System RAM" region */ + ret = parse_iomem_single("System RAM\n", &start, &end); + if (ret) + die("Could not parse System RAM region " + "in /proc/iomem\n"); + } + + return addr - seg + start; +} + +/* + * add_segment() should convert base to a physical address on superh, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +/* + * add_buffer() should convert base to a physical address on superh, + * while the default is just to work with base as is */ +unsigned long add_buffer(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 1); +} diff --git a/kexec/arch/sh/kexec-sh.h b/kexec/arch/sh/kexec-sh.h new file mode 100644 index 0000000..7d28ade --- /dev/null +++ b/kexec/arch/sh/kexec-sh.h @@ -0,0 +1,29 @@ +#ifndef KEXEC_SH_H +#define KEXEC_SH_H + +#define COMMAND_LINE_SIZE 2048 + +int uImage_sh_probe(const char *buf, off_t len); +int uImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); + +int zImage_sh_probe(const char *buf, off_t len); +int zImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void zImage_sh_usage(void); + +int elf_sh_probe(const char *buf, off_t len); +int elf_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_sh_usage(void); + +int netbsd_sh_probe(const char *buf, off_t len); +int netbsd_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void netbsd_sh_usage(void); + +char *get_append(void); +void kexec_sh_setup_zero_page(char *zero_page_buf, size_t zero_page_size, + char *cmd_line); + +#endif /* KEXEC_SH_H */ diff --git a/kexec/arch/sh/kexec-uImage-sh.c b/kexec/arch/sh/kexec-uImage-sh.c new file mode 100644 index 0000000..130f12c --- /dev/null +++ b/kexec/arch/sh/kexec-uImage-sh.c @@ -0,0 +1,24 @@ +/* + * uImage support added by Marc Andre Tanner <mat@brain-dump.org> + * + * Cloned from ARM by Paul Mundt, 2009. + */ +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <kexec-uImage.h> +#include "../../kexec.h" +#include "kexec-sh.h" + +int uImage_sh_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_SH); +} + +int uImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + return zImage_sh_load(argc, argv, buf + sizeof(struct image_header), + len - sizeof(struct image_header), info); +} diff --git a/kexec/arch/sh/kexec-zImage-sh.c b/kexec/arch/sh/kexec-zImage-sh.c new file mode 100644 index 0000000..6dc2c13 --- /dev/null +++ b/kexec/arch/sh/kexec-zImage-sh.c @@ -0,0 +1,142 @@ +/* + * kexec-zImage-sh.c - kexec zImage loader for the SH + * Copyright (C) 2005 kogiidena@eggplant.ddo.jp + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include <arch/options.h> +#include "kexec-sh.h" + +static const int probe_debug = 0; + +#define HEAD32_KERNEL_START_ADDR 0 +#define HEAD32_DECOMPRESS_KERNEL_ADDR 1 +#define HEAD32_INIT_STACK_ADDR 2 +#define HEAD32_INIT_SR 3 +#define HEAD32_INIT_SR_VALUE 0x400000F0 + +static unsigned long zImage_head32(const char *buf, int offs) +{ + unsigned long *values = (void *)buf; + int k; + + for (k = (0x200 / 4) - 1; k > 0; k--) + if (values[k] != 0x00090009) /* not nop + nop padding*/ + return values[k - offs]; + + return 0; +} + +/* + * zImage_sh_probe - sanity check the elf image + * + * Make sure that the file image has a reasonable chance of working. + */ +int zImage_sh_probe(const char *buf, off_t UNUSED(len)) +{ + if (memcmp(&buf[0x202], "HdrS", 4) != 0) + return -1; + + if (zImage_head32(buf, HEAD32_INIT_SR) != HEAD32_INIT_SR_VALUE) + return -1; + + return 0; +} + +void zImage_sh_usage(void) +{ + printf( + " --append=STRING Set the kernel command line to STRING.\n" + " --empty-zero=ADDRESS Set the kernel top ADDRESS. \n\n"); + +} + +int zImage_sh_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line; + int opt; + unsigned long empty_zero, zero_page_base, zero_page_size, k; + unsigned long image_base; + char *param; + + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {0, 0, 0, 0}, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + + if (!command_line) + command_line = get_append(); + + /* assume the zero page is the page before the vmlinux entry point. + * we don't know the page size though, but 64k seems to be max. + * put several 4k zero page copies before the entry point to cover + * all combinations. + */ + + empty_zero = zImage_head32(buf, HEAD32_KERNEL_START_ADDR); + + zero_page_size = 0x10000; + zero_page_base = virt_to_phys(empty_zero - zero_page_size); + + while (!valid_memory_range(info, zero_page_base, + zero_page_base + zero_page_size - 1)) { + zero_page_base += 0x1000; + zero_page_size -= 0x1000; + if (zero_page_size == 0) + die("Unable to determine zero page size from %p \n", + (void *)empty_zero); + } + + param = xmalloc(zero_page_size); + for (k = 0; k < (zero_page_size / 0x1000); k++) + kexec_sh_setup_zero_page(param + (k * 0x1000), 0x1000, + command_line); + + add_segment(info, param, zero_page_size, + 0x80000000 | zero_page_base, zero_page_size); + + /* load image a bit above the zero page, round up to 64k + * the zImage will relocate itself, but only up seems supported. + */ + + image_base = _ALIGN(empty_zero, 0x10000); + add_segment(info, buf, len, image_base, len); + info->entry = (void *)virt_to_phys(image_base); + return 0; +} diff --git a/kexec/arch/sh/netbsd_booter.S b/kexec/arch/sh/netbsd_booter.S new file mode 100644 index 0000000..d4d16df --- /dev/null +++ b/kexec/arch/sh/netbsd_booter.S @@ -0,0 +1,47 @@ + .globl netbsd_booter +netbsd_booter: + mov.l ccr,r0 + mov #0,r1 + mov.l r1,@r0 + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + mova netbsd_start,r0 + mov.l @r0,r1 + add #4,r0 + mov.l @r0,r4 ! howto + add #4,r0 + mov r0,r5 ! bootinfo + jmp @r1 + nop + nop + nop + .align 4 +ccr: .long 0xff00001c + + .align 8 +netbsd_start: + .long 0x8c001000 + .long 0x200 ! howto + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + .long 0 ! bootinfo + diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile new file mode 100644 index 0000000..275add5 --- /dev/null +++ b/kexec/arch/x86_64/Makefile @@ -0,0 +1,24 @@ +# +# kexec x86_64 (linux booting linux) +# +x86_64_KEXEC_SRCS = kexec/arch/i386/kexec-elf-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-bzImage.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-mb2-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +x86_64_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c +x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c + +x86_64_KEXEC_SRCS_native = kexec/arch/x86_64/kexec-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c + +x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native) + +# Don't add sources in i386/ to dist, as i386/Makefile adds them +dist += kexec/arch/x86_64/Makefile $(x86_64_KEXEC_SRCS_native) \ + kexec/arch/x86_64/kexec-x86_64.h \ + kexec/arch/x86_64/include/arch/options.h diff --git a/kexec/arch/x86_64/include/arch/options.h b/kexec/arch/x86_64/include/arch/options.h new file mode 120000 index 0000000..047b0f9 --- /dev/null +++ b/kexec/arch/x86_64/include/arch/options.h @@ -0,0 +1 @@ +../../../i386/include/arch/options.h
\ No newline at end of file diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c new file mode 100644 index 0000000..aba4e3b --- /dev/null +++ b/kexec/arch/x86_64/kexec-bzImage64.c @@ -0,0 +1,397 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2010 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86_64.h" +#include "../i386/x86-linux-setup.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +static const int probe_debug = 0; + +int bzImage64_probe(const char *buf, off_t len) +{ + const struct x86_linux_header *header; + + if ((uintmax_t)len < (uintmax_t)(2 * 512)) { + if (probe_debug) + fprintf(stderr, "File is too short to be a bzImage!\n"); + return -1; + } + header = (const struct x86_linux_header *)buf; + if (memcmp(header->header_magic, "HdrS", 4) != 0) { + if (probe_debug) + fprintf(stderr, "Not a bzImage\n"); + return -1; + } + if (header->boot_sector_magic != 0xAA55) { + if (probe_debug) + fprintf(stderr, "No x86 boot sector present\n"); + /* No x86 boot sector present */ + return -1; + } + if (header->protocol_version < 0x020C) { + if (probe_debug) + fprintf(stderr, "Must be at least protocol version 2.12\n"); + /* Must be at least protocol version 2.12 */ + return -1; + } + if ((header->loadflags & 1) == 0) { + if (probe_debug) + fprintf(stderr, "zImage not a bzImage\n"); + /* Not a bzImage */ + return -1; + } + if ((header->xloadflags & 3) != 3) { + if (probe_debug) + fprintf(stderr, "Not a relocatable bzImage64\n"); + /* Must be KERNEL_64 and CAN_BE_LOADED_ABOVE_4G */ + return -1; + } + +#define XLF_EFI_KEXEC (1 << 4) + if ((header->xloadflags & XLF_EFI_KEXEC) == XLF_EFI_KEXEC) + bzImage_support_efi_boot = 1; + + /* I've got a relocatable bzImage64 */ + if (probe_debug) + fprintf(stderr, "It's a relocatable bzImage64\n"); + return 0; +} + +void bzImage64_usage(void) +{ + printf( " --entry-32bit Use the kernels 32bit entry point.\n" + " --real-mode Use the kernels real mode entry point.\n" + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + ); +} + +static int do_bzImage64_load(struct kexec_info *info, + const char *kernel, off_t kernel_len, + const char *command_line, off_t command_line_len, + const char *initrd, off_t initrd_len) +{ + struct x86_linux_header setup_header; + struct x86_linux_param_header *real_mode; + int setup_sects; + size_t size; + int kern16_size; + unsigned long setup_base, setup_size, setup_header_size; + struct entry64_regs regs64; + char *modified_cmdline; + unsigned long cmdline_end; + unsigned long align, addr, k_size; + unsigned kern16_size_needed; + + /* + * Find out about the file I am about to load. + */ + if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512)) + return -1; + + memcpy(&setup_header, kernel, sizeof(setup_header)); + setup_sects = setup_header.setup_sects; + if (setup_sects == 0) + setup_sects = 4; + kern16_size = (setup_sects + 1) * 512; + if (kernel_len < kern16_size) { + fprintf(stderr, "BzImage truncated?\n"); + return -1; + } + + if ((uintmax_t)command_line_len > (uintmax_t)setup_header.cmdline_size) { + dbgprintf("Kernel command line too long for kernel!\n"); + return -1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. load_crashdump_segments will take care of + * loading the segments as high in memory as possible, hence + * in turn as away as possible from kernel to avoid being + * stomped by the kernel. + */ + if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0) + return -1; + + /* Use new command line buffer */ + command_line = modified_cmdline; + command_line_len = strlen(command_line) + 1; + } + + /* x86_64 purgatory could be anywhere */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0x3000, -1, -1, 0); + dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr); + /* The argument/parameter segment */ + kern16_size_needed = kern16_size; + if (kern16_size_needed < 4096) + kern16_size_needed = 4096; + setup_size = kern16_size_needed + command_line_len + + PURGATORY_CMDLINE_SIZE; + real_mode = xmalloc(setup_size); + memset(real_mode, 0, setup_size); + + /* only copy setup_header */ + setup_header_size = kernel[0x201] + 0x202 - 0x1f1; + if (setup_header_size > 0x7f) + setup_header_size = 0x7f; + memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1, + setup_header_size); + + /* No real mode code will be executing. setup segment can be loaded + * anywhere as we will be just reading command line. + */ + setup_base = add_buffer(info, real_mode, setup_size, setup_size, + 16, 0x3000, -1, -1); + + dbgprintf("Loaded real_mode_data and command line at 0x%lx\n", + setup_base); + + /* The main kernel segment */ + k_size = kernel_len - kern16_size; + /* need to use run-time size for buffer searching */ + dbgprintf("kernel init_size 0x%x\n", real_mode->init_size); + size = _ALIGN(real_mode->init_size, 4096); + align = real_mode->kernel_alignment; + addr = add_buffer(info, kernel + kern16_size, k_size, + size, align, 0x100000, -1, -1); + if (addr == ULONG_MAX) + die("can not load bzImage64"); + dbgprintf("Loaded 64bit kernel at 0x%lx\n", addr); + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters_high(info, real_mode, setup_base, + kern16_size_needed, command_line, command_line_len, + initrd, initrd_len, 1); /* put initrd high too */ + + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s64, + sizeof(regs64)); + regs64.rbx = 0; /* Bootstrap processor */ + regs64.rsi = setup_base; /* Pointer to the parameters */ + regs64.rip = addr + 0x200; /* the entry point for startup_64 */ + regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s64, + sizeof(regs64)); + + cmdline_end = setup_base + kern16_size_needed + command_line_len - 1; + elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end, + sizeof(unsigned long)); + + /* Fill in the information BIOS calls would normally provide. */ + setup_linux_system_parameters(info, real_mode); + + return 0; +} + +/* This assumes file is being loaded using file based kexec syscall */ +int bzImage64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *command_line = NULL, *tmp_cmdline = NULL; + const char *ramdisk = NULL, *append = NULL; + int entry_16bit = 0, entry_32bit = 0; + int opt; + int command_line_len; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "entry-32bit", 0, 0, OPT_ENTRY_32BIT }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + entry_16bit = 1; + break; + case OPT_ENTRY_32BIT: + entry_32bit = 1; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + if (entry_16bit || entry_32bit) { + fprintf(stderr, "Kexec2 syscall does not support 16bit" + " or 32bit entry yet\n"); + ret = -1; + goto out; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = command_line; + info->command_line_len = command_line_len; + return ret; +out: + free(command_line); + return ret; +} + +int bzImage64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + char *command_line = NULL, *tmp_cmdline = NULL; + const char *ramdisk = NULL, *append = NULL; + char *ramdisk_buf; + off_t ramdisk_length = 0; + int command_line_len; + int entry_16bit = 0, entry_32bit = 0; + int opt; + int result; + + if (info->file_mode) + return bzImage64_load_file(argc, argv, info); + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "entry-32bit", 0, 0, OPT_ENTRY_32BIT }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + entry_16bit = 1; + break; + case OPT_ENTRY_32BIT: + entry_32bit = 1; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + ramdisk_buf = 0; + if (ramdisk) + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + + if (entry_16bit || entry_32bit) + result = do_bzImage_load(info, buf, len, command_line, + command_line_len, ramdisk_buf, + ramdisk_length, 0, 0, entry_16bit); + else + result = do_bzImage64_load(info, buf, len, command_line, + command_line_len, ramdisk_buf, + ramdisk_length); + + free(command_line); + return result; +} diff --git a/kexec/arch/x86_64/kexec-elf-rel-x86_64.c b/kexec/arch/x86_64/kexec-elf-rel-x86_64.c new file mode 100644 index 0000000..0d22f3b --- /dev/null +++ b/kexec/arch/x86_64/kexec-elf-rel-x86_64.c @@ -0,0 +1,94 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2LSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS64 && + ehdr->ei_class != ELFCLASS32) { /* x32 */ + return 0; + } + if (ehdr->e_machine != EM_X86_64) { + return 0; + } + return 1; +} + +static const char *reloc_name(unsigned long r_type) +{ + static const char *r_name[] = { + "R_X86_64_NONE", + "R_X86_64_64", + "R_X86_64_PC32", + "R_X86_64_GOT32", + "R_X86_64_PLT32", + "R_X86_64_COPY", + "R_X86_64_GLOB_DAT", + "R_X86_64_JUMP_SLOT", + "R_X86_64_RELATIVE", + "R_X86_64_GOTPCREL", + "R_X86_64_32", + "R_X86_64_32S", + "R_X86_64_16", + "R_X86_64_PC16", + "R_X86_64_8", + "R_X86_64_PC8", + "R_X86_64_DTPMOD64", + "R_X86_64_DTPOFF64", + "R_X86_64_TPOFF64", + "R_X86_64_TLSGD", + "R_X86_64_TLSLD", + "R_X86_64_DTPOFF32", + "R_X86_64_GOTTPOFF", + "R_X86_64_TPOFF32", + }; + static char buf[100]; + const char *name; + if (r_type < (sizeof(r_name)/sizeof(r_name[0]))){ + name = r_name[r_type]; + } + else { + sprintf(buf, "R_X86_64_%lu", r_type); + name = buf; + } + return name; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + dbgprintf("%s\n", reloc_name(r_type)); + switch(r_type) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(uint64_t *)location = value; + break; + case R_X86_64_32: + *(uint32_t *)location = value; + if (value != *(uint32_t *)location) + goto overflow; + break; + case R_X86_64_32S: + *(uint32_t *)location = value; + if ((int64_t)value != *(int32_t *)location) + goto overflow; + break; + case R_X86_64_PC32: + case R_X86_64_PLT32: + *(uint32_t *)location = value - address; + break; + default: + die("Unhandled rela relocation: %s\n", reloc_name(r_type)); + break; + } + return; + overflow: + die("overflow in relocation type %s val %lx\n", + reloc_name(r_type), value); +} diff --git a/kexec/arch/x86_64/kexec-elf-x86_64.c b/kexec/arch/x86_64/kexec-elf-x86_64.c new file mode 100644 index 0000000..7f9540a --- /dev/null +++ b/kexec/arch/x86_64/kexec-elf-x86_64.c @@ -0,0 +1,257 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <x86/x86-linux.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "../../kexec-elf.h" +#include "../../kexec-elf-boot.h" +#include "../i386/x86-linux-setup.h" +#include "kexec-x86_64.h" +#include "../i386/kexec-x86.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +int elf_x86_64_probe(const char *buf, off_t len) +{ + return elf_x86_any_probe(buf, len, CORE_TYPE_ELF64); +} + +void elf_x86_64_usage(void) +{ + printf( " --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --args-linux Pass linux kernel style options\n" + " --args-elf Pass elf boot notes\n" + " --args-none Jump directly from the kernel\n" + ); +} + +int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *append = NULL; + char *command_line = NULL, *modified_cmdline; + char *tmp_cmdline = NULL; + int command_line_len; + const char *ramdisk; + unsigned long entry, max_addr; + int arg_style; +#define ARG_STYLE_ELF 0 +#define ARG_STYLE_LINUX 1 +#define ARG_STYLE_NONE 2 + int opt; + int result = 0; + const char *error_msg = NULL; + + /* See options.h and add any new options there too! */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "args-elf", 0, NULL, OPT_ARGS_ELF }, + { "args-linux", 0, NULL, OPT_ARGS_LINUX }, + { "args-none", 0, NULL, OPT_ARGS_NONE }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR ""; + + /* + * Parse the command line arguments + */ + arg_style = ARG_STYLE_ELF; + modified_cmdline = 0; + ramdisk = 0; + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + fprintf(stderr, "Unknown option: opt: %d\n", opt); + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_ARGS_ELF: + arg_style = ARG_STYLE_ELF; + break; + case OPT_ARGS_LINUX: + arg_style = ARG_STYLE_LINUX; + break; + case OPT_ARGS_NONE: +#ifdef __x86_64__ + arg_style = ARG_STYLE_NONE; +#else + die("--args-none only works on arch x86_64\n"); +#endif + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) +1; + } + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (command_line) { + strncpy(modified_cmdline, command_line, + COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Load the ELF executable */ + elf_exec_build_load(info, &ehdr, buf, len, 0); + + entry = ehdr.e_entry; + max_addr = elf_max_addr(&ehdr); + + /* Do we want arguments? */ + if (arg_style != ARG_STYLE_NONE) { + /* Load the setup code */ + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + } + if (arg_style == ARG_STYLE_NONE) { + info->entry = (void *)entry; + + } + else if (arg_style == ARG_STYLE_ELF) { + unsigned long note_base; + struct entry64_regs regs; + + /* Setup the ELF boot notes */ + note_base = elf_boot_notes(info, max_addr, command_line, command_line_len); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + regs.rdi = note_base; /* The notes (arg1) */ + regs.rip = entry; /* The entry point */ + regs.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + + if (ramdisk) { + error_msg = "Ramdisks not supported with generic elf arguments"; + goto out; + } + } + else if (arg_style == ARG_STYLE_LINUX) { + struct x86_linux_faked_param_header *hdr; + unsigned long param_base; + char *ramdisk_buf; + off_t ramdisk_length; + struct entry64_regs regs; + int rc=0; + + /* Get the linux parameter header */ + hdr = xmalloc(sizeof(*hdr)); + param_base = add_buffer(info, hdr, sizeof(*hdr), sizeof(*hdr), + 16, 0, max_addr, 1); + + /* Initialize the parameter header */ + memset(hdr, 0, sizeof(*hdr)); + init_linux_parameters(&hdr->hdr); + + /* Add a ramdisk to the current image */ + ramdisk_buf = 0; + ramdisk_length = 0; + if (ramdisk) { + ramdisk_buf = slurp_file(ramdisk, &ramdisk_length); + } + + /* If panic kernel is being loaded, additional segments need + * to be created. */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + rc = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (rc < 0) { + result = -1; + goto out; + } + /* Use new command line. */ + free(command_line); + command_line = modified_cmdline; + command_line_len = strlen(modified_cmdline) + 1; + modified_cmdline = NULL; + } + + /* Tell the kernel what is going on */ + setup_linux_bootloader_parameters(info, &hdr->hdr, param_base, + offsetof(struct x86_linux_faked_param_header, command_line), + command_line, command_line_len, + ramdisk_buf, ramdisk_length); + + /* Fill in the information bios calls would usually provide */ + setup_linux_system_parameters(info, &hdr->hdr); + + /* Initialize the registers */ + elf_rel_get_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + regs.rbx = 0; /* Bootstrap processor */ + regs.rsi = param_base; /* Pointer to the parameters */ + regs.rip = entry; /* the entry point */ + regs.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */ + elf_rel_set_symbol(&info->rhdr, "entry64_regs", ®s, sizeof(regs)); + } + else { + error_msg = "Unknown argument style\n"; + } + +out: + free(command_line); + free(modified_cmdline); + if (error_msg) + die("%s", error_msg); + return result; +} diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c new file mode 100644 index 0000000..ffd84f0 --- /dev/null +++ b/kexec/arch/x86_64/kexec-x86_64.c @@ -0,0 +1,190 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-x86_64.h" +#include "../i386/crashdump-x86.h" +#include <arch/options.h> + +struct file_type file_type[] = { + { "multiboot2-x86", multiboot2_x86_probe, multiboot2_x86_load, + multiboot2_x86_usage }, + { "elf-x86_64", elf_x86_64_probe, elf_x86_64_load, elf_x86_64_usage }, + { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, + multiboot_x86_usage }, + { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, + { "bzImage64", bzImage64_probe, bzImage64_load, bzImage64_usage }, + { "bzImage", bzImage_probe, bzImage_load, bzImage_usage }, + { "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage }, + { "nbi-x86", nbi_probe, nbi_load, nbi_usage }, +}; +int file_types = sizeof(file_type)/sizeof(file_type[0]); + + +void arch_usage(void) +{ + printf( + " --reset-vga Attempt to reset a standard vga device\n" + " --serial=<port> Specify the serial port for debug output\n" + " --serial-baud=<baud_rate> Specify the serial port baud rate\n" + " --console-vga Enable the vga console\n" + " --console-serial Enable the serial console\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" + " --reuse-video-type Reuse old boot time video type blindly\n" + ); +} + +struct arch_options_t arch_options = { + .reset_vga = 0, + .serial_base = 0x3f8, + .serial_baud = 0, + .console_vga = 0, + .console_serial = 0, + .core_header_type = CORE_TYPE_ELF64, + .pass_memmap_cmdline = 0, + .noefi = 0, + .reuse_video_type = 0, +}; + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + unsigned long value; + char *end; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_RESET_VGA: + arch_options.reset_vga = 1; + break; + case OPT_CONSOLE_VGA: + arch_options.console_vga = 1; + break; + case OPT_CONSOLE_SERIAL: + arch_options.console_serial = 1; + break; + case OPT_SERIAL: + value = ULONG_MAX; + if (strcmp(optarg, "ttyS0") == 0) { + value = 0x3f8; + } + else if (strcmp(optarg, "ttyS1") == 0) { + value = 0x2f8; + } + else if (strncmp(optarg, "0x", 2) == 0) { + value = strtoul(optarg +2, &end, 16); + if (*end != '\0') { + value = ULONG_MAX; + } + } + if (value >= 65536) { + fprintf(stderr, "Bad serial port base '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_base = value; + break; + case OPT_SERIAL_BAUD: + value = strtoul(optarg, &end, 0); + if ((value > 115200) || ((115200 %value) != 0) || + (value < 9600) || (*end)) + { + fprintf(stderr, "Bad serial port baud rate '%s'\n", + optarg); + usage(); + return -1; + + } + arch_options.serial_baud = value; + break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; + case OPT_REUSE_VIDEO_TYPE: + arch_options.reuse_video_type = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "x86_64", KEXEC_ARCH_X86_64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *info) +{ + uint8_t panic_kernel = 0; + + elf_rel_set_symbol(&info->rhdr, "reset_vga", + &arch_options.reset_vga, sizeof(arch_options.reset_vga)); + elf_rel_set_symbol(&info->rhdr, "serial_base", + &arch_options.serial_base, sizeof(arch_options.serial_base)); + elf_rel_set_symbol(&info->rhdr, "serial_baud", + &arch_options.serial_baud, sizeof(arch_options.serial_baud)); + elf_rel_set_symbol(&info->rhdr, "console_vga", + &arch_options.console_vga, sizeof(arch_options.console_vga)); + elf_rel_set_symbol(&info->rhdr, "console_serial", + &arch_options.console_serial, sizeof(arch_options.console_serial)); + elf_rel_set_symbol(&info->rhdr, "backup_src_start", + &info->backup_src_start, sizeof(info->backup_src_start)); + elf_rel_set_symbol(&info->rhdr, "backup_src_size", + &info->backup_src_size, sizeof(info->backup_src_size)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + panic_kernel = 1; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &info->backup_start, sizeof(info->backup_start)); + } + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &panic_kernel, sizeof(panic_kernel)); +} diff --git a/kexec/arch/x86_64/kexec-x86_64.h b/kexec/arch/x86_64/kexec-x86_64.h new file mode 100644 index 0000000..21c3a73 --- /dev/null +++ b/kexec/arch/x86_64/kexec-x86_64.h @@ -0,0 +1,41 @@ +#ifndef KEXEC_X86_64_H +#define KEXEC_X86_64_H + +#include "../i386/kexec-x86.h" + +struct entry64_regs { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t rbp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; +}; + +int elf_x86_64_probe(const char *buf, off_t len); +int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_x86_64_usage(void); + +int bzImage64_probe(const char *buf, off_t len); +int bzImage64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void bzImage64_usage(void); + +int multiboot2_x86_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void multiboot2_x86_usage(void); +int multiboot2_x86_probe(const char *buf, off_t buf_len); + +#endif /* KEXEC_X86_64_H */ diff --git a/kexec/arch_reuse_initrd.c b/kexec/arch_reuse_initrd.c new file mode 100644 index 0000000..b92b8d8 --- /dev/null +++ b/kexec/arch_reuse_initrd.c @@ -0,0 +1,8 @@ +#include "kexec.h" + +unsigned char reuse_initrd = 0; + +void arch_reuse_initrd(void) +{ + die("--reuseinitrd not implemented on this architecture\n"); +} diff --git a/kexec/crashdump-elf.c b/kexec/crashdump-elf.c new file mode 100644 index 0000000..b8bb686 --- /dev/null +++ b/kexec/crashdump-elf.c @@ -0,0 +1,241 @@ + +#if !defined(FUNC) || !defined(EHDR) || !defined(PHDR) +#error FUNC, EHDR and PHDR must be defined +#endif + +#if (ELF_WIDTH == 64) +#define dbgprintf_phdr(prefix, phdr) \ +do { \ + dbgprintf("%s: p_type = %u, p_offset = 0x%llx p_paddr = 0x%llx " \ + "p_vaddr = 0x%llx p_filesz = 0x%llx p_memsz = 0x%llx\n", \ + (prefix), (phdr)->p_type, \ + (unsigned long long)((phdr)->p_offset), \ + (unsigned long long)((phdr)->p_paddr), \ + (unsigned long long)((phdr)->p_vaddr), \ + (unsigned long long)((phdr)->p_filesz), \ + (unsigned long long)((phdr)->p_memsz)); \ +} while(0) +#else +#define dbgprintf_phdr(prefix, phdr) \ +do { \ + dbgprintf("%s: p_type = %u, p_offset = 0x%x " "p_paddr = 0x%x " \ + "p_vaddr = 0x%x p_filesz = 0x%x p_memsz = 0x%x\n", \ + (prefix), (phdr)->p_type, (phdr)->p_offset, (phdr)->p_paddr, \ + (phdr)->p_vaddr, (phdr)->p_filesz, (phdr)->p_memsz); \ +} while(0) +#endif + +/* Prepares the crash memory headers and stores in supplied buffer. */ +int FUNC(struct kexec_info *info, + struct crash_elf_info *elf_info, + struct memory_range *range, int ranges, + void **buf, unsigned long *size, unsigned long align) +{ + EHDR *elf; + PHDR *phdr; + int i; + unsigned long sz; + char *bufp; + long int nr_cpus = 0; + uint64_t notes_addr, notes_len; + uint64_t vmcoreinfo_addr, vmcoreinfo_len; + int has_vmcoreinfo = 0; + int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len); + long int count_cpu; + + if (xen_present()) + nr_cpus = xen_get_nr_phys_cpus(); + else + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (nr_cpus < 0) { + return -1; + } + + if (xen_present()) { + if (!get_xen_vmcoreinfo(&vmcoreinfo_addr, &vmcoreinfo_len)) + has_vmcoreinfo = 1; + } else + if (!get_kernel_vmcoreinfo(&vmcoreinfo_addr, &vmcoreinfo_len)) + has_vmcoreinfo = 1; + + sz = sizeof(EHDR) + (nr_cpus + has_vmcoreinfo) * sizeof(PHDR) + + ranges * sizeof(PHDR); + + /* + * Certain architectures such as x86_64 and ia64 require a separate + * PT_LOAD program header for the kernel. This is controlled through + * elf_info->kern_size. + * + * The separate PT_LOAD program header is required either because the + * kernel is mapped at a different location than the rest of the + * physical memory or because we need to support relocatable kernels. + * Or both as on x86_64. + * + * In the relocatable kernel case this PT_LOAD segment is used to tell + * where the kernel was actually loaded which may be different from + * the load address present in the vmlinux file. + * + * The extra kernel PT_LOAD program header results in a vmcore file + * which is larger than the size of the physical memory. This is + * because the memory for the kernel is present both in the kernel + * PT_LOAD program header and in the physical RAM program headers. + */ + + if (elf_info->kern_size && !xen_present()) { + sz += sizeof(PHDR); + } + + /* + * Make sure the ELF core header is aligned to at least 1024. + * We do this because the secondary kernel gets the ELF core + * header address on the kernel command line through the memmap= + * option, and this option requires 1k granularity. + */ + + if (align % ELF_CORE_HEADER_ALIGN) { + return -1; + } + + sz = _ALIGN(sz, align); + + bufp = xmalloc(sz); + memset(bufp, 0, sz); + + *buf = bufp; + *size = sz; + + /* Setup ELF Header*/ + elf = (EHDR *) bufp; + bufp += sizeof(EHDR); + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = elf_info->class; + elf->e_ident[EI_DATA] = elf_info->data; + elf->e_ident[EI_VERSION]= EV_CURRENT; + elf->e_ident[EI_OSABI] = ELFOSABI_NONE; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + elf->e_type = ET_CORE; + elf->e_machine = crash_architecture(elf_info); + elf->e_version = EV_CURRENT; + elf->e_entry = 0; + elf->e_phoff = sizeof(EHDR); + elf->e_shoff = 0; + elf->e_flags = 0; + elf->e_ehsize = sizeof(EHDR); + elf->e_phentsize= sizeof(PHDR); + elf->e_phnum = 0; + elf->e_shentsize= 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + + /* Default way to get crash notes is by get_crash_notes_per_cpu() */ + + get_note_info = elf_info->get_note_info; + if (!get_note_info) + get_note_info = get_crash_notes_per_cpu; + + if (xen_present()) + get_note_info = xen_get_note; + + /* PT_NOTE program headers. One per cpu */ + + count_cpu = nr_cpus; + for (i = 0; count_cpu > 0; i++) { + int ret; + + ret = get_note_info(i, ¬es_addr, ¬es_len); + count_cpu--; + if (ret < 0) /* This cpu is not present. Skip it. */ + continue; + + phdr = (PHDR *) bufp; + bufp += sizeof(PHDR); + phdr->p_type = PT_NOTE; + phdr->p_flags = 0; + phdr->p_offset = phdr->p_paddr = notes_addr; + phdr->p_vaddr = 0; + phdr->p_filesz = phdr->p_memsz = notes_len; + /* Do we need any alignment of segments? */ + phdr->p_align = 0; + + /* Increment number of program headers. */ + (elf->e_phnum)++; + dbgprintf_phdr("Elf header", phdr); + } + + if (has_vmcoreinfo && !(info->kexec_flags & KEXEC_PRESERVE_CONTEXT)) { + phdr = (PHDR *) bufp; + bufp += sizeof(PHDR); + phdr->p_type = PT_NOTE; + phdr->p_flags = 0; + phdr->p_offset = phdr->p_paddr = vmcoreinfo_addr; + phdr->p_vaddr = 0; + phdr->p_filesz = phdr->p_memsz = vmcoreinfo_len; + /* Do we need any alignment of segments? */ + phdr->p_align = 0; + + (elf->e_phnum)++; + dbgprintf_phdr("vmcoreinfo header", phdr); + } + + /* Setup an PT_LOAD type program header for the region where + * Kernel is mapped if elf_info->kern_size is non-zero. + */ + + if (elf_info->kern_size && !xen_present()) { + phdr = (PHDR *) bufp; + bufp += sizeof(PHDR); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = phdr->p_paddr = elf_info->kern_paddr_start; + phdr->p_vaddr = elf_info->kern_vaddr_start; + phdr->p_filesz = phdr->p_memsz = elf_info->kern_size; + phdr->p_align = 0; + (elf->e_phnum)++; + dbgprintf_phdr("Kernel text Elf header", phdr); + } + + /* Setup PT_LOAD type program header for every system RAM chunk. + * A seprate program header for Backup Region*/ + for (i = 0; i < ranges; i++, range++) { + unsigned long long mstart, mend; + if (range->type != RANGE_RAM) + continue; + mstart = range->start; + mend = range->end; + if (!mstart && !mend) + continue; + phdr = (PHDR *) bufp; + bufp += sizeof(PHDR); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mstart; + + if (mstart == info->backup_src_start + && (mend - mstart + 1) == info->backup_src_size) + phdr->p_offset = info->backup_start; + + /* We already prepared the header for kernel text. Map + * rest of the memory segments to kernel linearly mapped + * memory region. + */ + phdr->p_paddr = mstart; + phdr->p_vaddr = phys_to_virt(elf_info, mstart); + phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; + /* Do we need any alignment of segments? */ + phdr->p_align = 0; + + /* HIGMEM has a virtual address of -1 */ + + if (elf_info->lowmem_limit + && (mend > (elf_info->lowmem_limit - 1))) + phdr->p_vaddr = -1; + + /* Increment number of program headers. */ + (elf->e_phnum)++; + dbgprintf_phdr("Elf header", phdr); + } + return 0; +} + +#undef dbgprintf_phdr diff --git a/kexec/crashdump-xen.c b/kexec/crashdump-xen.c new file mode 100644 index 0000000..3f59a0d --- /dev/null +++ b/kexec/crashdump-xen.c @@ -0,0 +1,256 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <elf.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> +#include "kexec.h" +#include "crashdump.h" +#include "kexec-syscall.h" +#include "config.h" +#include "kexec-xen.h" + +struct crash_note_info { + unsigned long base; + unsigned long length; +}; + +static int xen_phys_cpus; +static struct crash_note_info *xen_phys_notes; + +/* based on code from xen-detect.c */ +static int is_dom0; +#if defined(__i386__) || defined(__x86_64__) +static jmp_buf xen_sigill_jmp; +void xen_sigill_handler(int sig) +{ + longjmp(xen_sigill_jmp, 1); +} + +static void xen_cpuid(uint32_t idx, uint32_t *regs, int pv_context) +{ +#ifdef __i386__ + /* Use the stack to avoid reg constraint failures with some gcc flags */ + asm volatile ( + "push %%eax; push %%ebx; push %%ecx; push %%edx\n\t" + "test %1,%1 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t" + "mov %%eax,(%2); mov %%ebx,4(%2)\n\t" + "mov %%ecx,8(%2); mov %%edx,12(%2)\n\t" + "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax\n\t" + : : "a" (idx), "c" (pv_context), "S" (regs) : "memory" ); +#else + asm volatile ( + "test %5,%5 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t" + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3]) + : "0" (idx), "1" (pv_context), "2" (0) ); +#endif +} + +static int check_for_xen(int pv_context) +{ + uint32_t regs[4]; + char signature[13]; + uint32_t base; + + for (base = 0x40000000; base < 0x40010000; base += 0x100) + { + xen_cpuid(base, regs, pv_context); + + *(uint32_t *)(signature + 0) = regs[1]; + *(uint32_t *)(signature + 4) = regs[2]; + *(uint32_t *)(signature + 8) = regs[3]; + signature[12] = '\0'; + + if (strcmp("XenVMMXenVMM", signature) == 0 && regs[0] >= (base + 2)) + goto found; + } + + return 0; + +found: + xen_cpuid(base + 1, regs, pv_context); + return regs[0]; +} + +static int xen_detect_pv_guest(void) +{ + struct sigaction act, oldact; + int is_pv = -1; + + if (setjmp(xen_sigill_jmp)) + return is_pv; + + memset(&act, 0, sizeof(act)); + act.sa_handler = xen_sigill_handler; + sigemptyset (&act.sa_mask); + if (sigaction(SIGILL, &act, &oldact)) + return is_pv; + if (check_for_xen(1)) + is_pv = 1; + sigaction(SIGILL, &oldact, NULL); + return is_pv; +} +#else +static int xen_detect_pv_guest(void) +{ + return 1; +} +#endif + +/* + * Return 1 if its a PV guest. + * This includes dom0, which is the only PV guest where kexec/kdump works. + * HVM guests have to be handled as native hardware. + */ +int xen_present(void) +{ + if (!is_dom0) { + if (access("/proc/xen", F_OK) == 0) + is_dom0 = xen_detect_pv_guest(); + else + is_dom0 = -1; + } + return is_dom0 > 0; +} + +unsigned long xen_architecture(struct crash_elf_info *elf_info) +{ + unsigned long machine = elf_info->machine; +#ifdef HAVE_LIBXENCTRL + int rc; + xen_capabilities_info_t capabilities; + xc_interface *xc; + + if (!xen_present()) + goto out; + + memset(capabilities, '0', XEN_CAPABILITIES_INFO_LEN); + + xc = xc_interface_open(NULL, NULL, 0); + if ( !xc ) { + fprintf(stderr, "failed to open xen control interface.\n"); + goto out; + } + + rc = xc_version(xc, XENVER_capabilities, &capabilities[0]); + if ( rc == -1 ) { + fprintf(stderr, "failed to make Xen version hypercall.\n"); + goto out_close; + } + + if (strstr(capabilities, "xen-3.0-x86_64")) + machine = EM_X86_64; + else if (strstr(capabilities, "xen-3.0-x86_32")) + machine = EM_386; + + out_close: + xc_interface_close(xc); + + out: +#endif + return machine; +} + +#ifdef HAVE_LIBXENCTRL +int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len) +{ + uint64_t end; + int ret = 0; + + ret = xen_get_kexec_range(KEXEC_RANGE_MA_VMCOREINFO, addr, &end); + if (ret < 0) + return -1; + + *len = end - *addr + 1; + + return 0; +} + +int xen_get_nr_phys_cpus(void) +{ + xc_interface *xc; + int max_cpus; + int cpu = -1; + + if (xen_phys_cpus) + return xen_phys_cpus; + + xc = xc_interface_open(NULL, NULL, 0); + if (!xc) { + fprintf(stderr, "failed to open xen control interface.\n"); + return -1; + } + + max_cpus = xc_get_max_cpus(xc); + if (max_cpus <= 0) + goto out; + + xen_phys_notes = calloc(max_cpus, sizeof(*xen_phys_notes)); + if (xen_phys_notes == NULL) + goto out; + + for (cpu = 0; cpu < max_cpus; cpu++) { + uint64_t size, start; + int ret; + + ret = xc_kexec_get_range(xc, KEXEC_RANGE_MA_CPU, cpu, &size, &start); + if (ret < 0) + break; + + xen_phys_notes[cpu].base = start; + xen_phys_notes[cpu].length = size; + } + + xen_phys_cpus = cpu; + +out: + xc_interface_close(xc); + return cpu; +} +#else +int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len) +{ + return -1; +} + +int xen_get_nr_phys_cpus(void) +{ + return -1; +} +#endif + + +int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) +{ + struct crash_note_info *note; + + if (xen_phys_cpus <= 0) + return -1; + + note = xen_phys_notes + cpu; + + *addr = note->base; + *len = note->length; + + return 0; +} + +#ifdef HAVE_LIBXENCTRL +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end) +{ + return xen_get_kexec_range(KEXEC_RANGE_MA_CRASH, start, end); +} +#else +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end) +{ + return -1; +} +#endif diff --git a/kexec/crashdump.c b/kexec/crashdump.c new file mode 100644 index 0000000..0b363c5 --- /dev/null +++ b/kexec/crashdump.c @@ -0,0 +1,159 @@ +/* + * crashdump.c: Architecture independent code for crashdump support. + * + * Created by: Vivek Goyal (vgoyal@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <linux/limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <elf.h> +#include "kexec.h" +#include "crashdump.h" +#include "kexec-syscall.h" + +/* include "crashdump-elf.c" twice to create two functions from one */ + +#define ELF_WIDTH 64 +#define FUNC crash_create_elf64_headers +#define EHDR Elf64_Ehdr +#define PHDR Elf64_Phdr +#include "crashdump-elf.c" +#undef ELF_WIDTH +#undef PHDR +#undef EHDR +#undef FUNC + +#define ELF_WIDTH 32 +#define FUNC crash_create_elf32_headers +#define EHDR Elf32_Ehdr +#define PHDR Elf32_Phdr +#include "crashdump-elf.c" +#undef ELF_WIDTH +#undef PHDR +#undef EHDR +#undef FUNC + +unsigned long crash_architecture(struct crash_elf_info *elf_info) +{ + if (xen_present()) + return xen_architecture(elf_info); + else + return elf_info->machine; +} + +/* Returns the physical address of start of crash notes buffer for a cpu. */ +int get_crash_notes_per_cpu(int cpu, uint64_t *addr, uint64_t *len) +{ + char crash_notes[PATH_MAX]; + char crash_notes_size[PATH_MAX]; + char line[MAX_LINE]; + FILE *fp; + struct stat cpu_stat; + int count; + unsigned long long temp; + int fopen_errno; + int stat_errno; + + *addr = 0; + *len = 0; + + sprintf(crash_notes, "/sys/devices/system/cpu/cpu%d/crash_notes", cpu); + fp = fopen(crash_notes, "r"); + if (!fp) { + fopen_errno = errno; + if (fopen_errno != ENOENT) + die("Could not open \"%s\": %s\n", crash_notes, + strerror(fopen_errno)); + if (stat("/sys/devices", &cpu_stat)) { + stat_errno = errno; + if (stat_errno == ENOENT) + die("\"/sys/devices\" does not exist. " + "Sysfs does not seem to be mounted. " + "Try mounting sysfs.\n"); + die("Could not open \"/sys/devices\": %s\n", + strerror(stat_errno)); + } + /* CPU is not physically present.*/ + return -1; + } + if (!fgets(line, sizeof(line), fp)) + die("Cannot parse %s: %s\n", crash_notes, strerror(errno)); + count = sscanf(line, "%llx", &temp); + if (count != 1) + die("Cannot parse %s: %s\n", crash_notes, strerror(errno)); + *addr = (uint64_t) temp; + fclose(fp); + + *len = MAX_NOTE_BYTES; + sprintf(crash_notes_size, + "/sys/devices/system/cpu/cpu%d/crash_notes_size", cpu); + fp = fopen(crash_notes_size, "r"); + if (fp) { + if (!fgets(line, sizeof(line), fp)) + die("Cannot parse %s: %s\n", + crash_notes_size, strerror(errno)); + count = sscanf(line, "%llu", &temp); + if (count != 1) + die("Cannot parse %s: %s\n", + crash_notes_size, strerror(errno)); + *len = (uint64_t) temp; + fclose(fp); + } + + dbgprintf("%s: crash_notes addr = %llx, size = %llu\n", __FUNCTION__, + (unsigned long long)*addr, (unsigned long long)*len); + + return 0; +} + +static int get_vmcoreinfo(const char *kdump_info, uint64_t *addr, uint64_t *len) +{ + char line[MAX_LINE]; + int count; + FILE *fp; + unsigned long long temp, temp2; + + *addr = 0; + *len = 0; + + if (!(fp = fopen(kdump_info, "r"))) + return -1; + + if (!fgets(line, sizeof(line), fp)) + die("Cannot parse %s: %s\n", kdump_info, strerror(errno)); + count = sscanf(line, "%llx %llx", &temp, &temp2); + if (count != 2) + die("Cannot parse %s: %s\n", kdump_info, strerror(errno)); + + *addr = (uint64_t) temp; + *len = (uint64_t) temp2; + + fclose(fp); + return 0; +} + +/* Returns the physical address of start of crash notes buffer for a kernel. */ +int get_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len) +{ + return get_vmcoreinfo("/sys/kernel/vmcoreinfo", addr, len); +} diff --git a/kexec/crashdump.h b/kexec/crashdump.h new file mode 100644 index 0000000..18bd691 --- /dev/null +++ b/kexec/crashdump.h @@ -0,0 +1,64 @@ +#ifndef CRASHDUMP_H +#define CRASHDUMP_H + +extern int get_crash_notes_per_cpu(int cpu, uint64_t *addr, uint64_t *len); +extern int get_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len); +extern int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len); + +/* Need to find a better way to determine per cpu notes section size. */ +#define MAX_NOTE_BYTES 1024 +/* Expecting ELF headers to fit in 64K. Increase it if you need more. */ +#define KCORE_ELF_HEADERS_SIZE 65536 +/* The address of the ELF header is passed to the secondary kernel + * using the kernel command line option memmap=nnn. + * The smallest unit the kernel accepts is in kilobytes, + * so we need to make sure the ELF header is aligned to 1024. + */ +#define ELF_CORE_HEADER_ALIGN 1024 + +/* structure passed to crash_create_elf32/64_headers() */ + +struct crash_elf_info { + unsigned long class; + unsigned long data; + unsigned long machine; + + unsigned long long page_offset; + unsigned long long kern_vaddr_start; + unsigned long long kern_paddr_start; + unsigned long kern_size; + unsigned long lowmem_limit; + + int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len); +}; + +typedef int(*crash_create_elf_headers_func)(struct kexec_info *info, + struct crash_elf_info *elf_info, + struct memory_range *range, + int ranges, + void **buf, unsigned long *size, + unsigned long align); + +int crash_create_elf32_headers(struct kexec_info *info, + struct crash_elf_info *elf_info, + struct memory_range *range, int ranges, + void **buf, unsigned long *size, + unsigned long align); + +int crash_create_elf64_headers(struct kexec_info *info, + struct crash_elf_info *elf_info, + struct memory_range *range, int ranges, + void **buf, unsigned long *size, + unsigned long align); + +unsigned long crash_architecture(struct crash_elf_info *elf_info); + +unsigned long phys_to_virt(struct crash_elf_info *elf_info, + unsigned long long paddr); + +unsigned long xen_architecture(struct crash_elf_info *elf_info); +int xen_get_nr_phys_cpus(void); +int xen_get_note(int cpu, uint64_t *addr, uint64_t *len); +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end); + +#endif /* CRASHDUMP_H */ diff --git a/kexec/dt-ops.c b/kexec/dt-ops.c new file mode 100644 index 0000000..0a96b75 --- /dev/null +++ b/kexec/dt-ops.c @@ -0,0 +1,176 @@ +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +#include "kexec.h" +#include "dt-ops.h" + +static const char n_chosen[] = "chosen"; + +static const char p_bootargs[] = "bootargs"; +static const char p_initrd_start[] = "linux,initrd-start"; +static const char p_initrd_end[] = "linux,initrd-end"; + +int dtb_set_initrd(char **dtb, off_t *dtb_size, off_t start, off_t end) +{ + int result; + uint64_t value; + + dbgprintf("%s: start %jd, end %jd, size %jd (%jd KiB)\n", + __func__, (intmax_t)start, (intmax_t)end, + (intmax_t)(end - start), + (intmax_t)(end - start) / 1024); + + value = cpu_to_fdt64(start); + + result = dtb_set_property(dtb, dtb_size, n_chosen, p_initrd_start, + &value, sizeof(value)); + + if (result) + return result; + + value = cpu_to_fdt64(end); + + result = dtb_set_property(dtb, dtb_size, n_chosen, p_initrd_end, + &value, sizeof(value)); + + if (result) { + dtb_delete_property(*dtb, n_chosen, p_initrd_start); + return result; + } + + return 0; +} + +void dtb_clear_initrd(char **dtb, off_t *dtb_size) +{ + dtb_delete_property(*dtb, n_chosen, p_initrd_start); + dtb_delete_property(*dtb, n_chosen, p_initrd_end); +} + +int dtb_set_bootargs(char **dtb, off_t *dtb_size, const char *command_line) +{ + return dtb_set_property(dtb, dtb_size, n_chosen, p_bootargs, + command_line, strlen(command_line) + 1); +} + +int dtb_set_property(char **dtb, off_t *dtb_size, const char *node, + const char *prop, const void *value, int value_len) +{ + int result; + int nodeoffset; + void *new_dtb; + int new_size; + char *new_node = NULL; + + value_len = FDT_TAGALIGN(value_len); + + new_size = FDT_TAGALIGN(*dtb_size + fdt_node_len(node) + + fdt_prop_len(prop, value_len)); + + new_dtb = malloc(new_size); + + if (!new_dtb) { + dbgprintf("%s: malloc failed\n", __func__); + return -ENOMEM; + } + + result = fdt_open_into(*dtb, new_dtb, new_size); + + if (result) { + dbgprintf("%s: fdt_open_into failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + + new_node = malloc(strlen("/") + strlen(node) + 1); + if (!new_node) { + dbgprintf("%s: malloc failed\n", __func__); + result = -ENOMEM; + goto on_error; + } + + strcpy(new_node, "/"); + strcat(new_node, node); + + nodeoffset = fdt_path_offset(new_dtb, new_node); + + if (nodeoffset == -FDT_ERR_NOTFOUND) { + result = fdt_add_subnode(new_dtb, 0, node); + + if (result < 0) { + dbgprintf("%s: fdt_add_subnode failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + nodeoffset = result; + } else if (nodeoffset < 0) { + dbgprintf("%s: fdt_path_offset failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + goto on_error; + } + + result = fdt_setprop(new_dtb, nodeoffset, prop, value, value_len); + + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + + /* + * Can't call free on dtb since dtb may have been mmaped by + * slurp_file(). + */ + + result = fdt_pack(new_dtb); + + if (result) + dbgprintf("%s: Unable to pack device tree: %s\n", __func__, + fdt_strerror(result)); + + *dtb = new_dtb; + *dtb_size = fdt_totalsize(*dtb); + + return 0; + +on_error: + free(new_dtb); + free(new_node); + return result; +} + +int dtb_delete_property(char *dtb, const char *node, const char *prop) +{ + int result, nodeoffset; + char *new_node = NULL; + + new_node = malloc(strlen("/") + strlen(node) + 1); + if (!new_node) { + dbgprintf("%s: malloc failed\n", __func__); + return -ENOMEM; + } + + strcpy(new_node, "/"); + strcat(new_node, node); + + nodeoffset = fdt_path_offset(dtb, new_node); + if (nodeoffset < 0) { + dbgprintf("%s: fdt_path_offset failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + free(new_node); + return nodeoffset; + } + + result = fdt_delprop(dtb, nodeoffset, prop); + + if (result) + dbgprintf("%s: fdt_delprop failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + + free(new_node); + return result; +} diff --git a/kexec/dt-ops.h b/kexec/dt-ops.h new file mode 100644 index 0000000..03659ce --- /dev/null +++ b/kexec/dt-ops.h @@ -0,0 +1,14 @@ +#if !defined(KEXEC_DT_OPS_H) +#define KEXEC_DT_OPS_H + +#include <sys/types.h> + +int dtb_set_initrd(char **dtb, off_t *dtb_size, off_t start, off_t end); +void dtb_clear_initrd(char **dtb, off_t *dtb_size); +int dtb_set_bootargs(char **dtb, off_t *dtb_size, const char *command_line); +int dtb_set_property(char **dtb, off_t *dtb_size, const char *node, + const char *prop, const void *value, int value_len); + +int dtb_delete_property(char *dtb, const char *node, const char *prop); + +#endif diff --git a/kexec/firmware_memmap.c b/kexec/firmware_memmap.c new file mode 100644 index 0000000..457c3dc --- /dev/null +++ b/kexec/firmware_memmap.c @@ -0,0 +1,302 @@ +/* + * firmware_memmap.c: Read /sys/firmware/memmap + * + * Created by: Bernhard Walle (bernhard.walle@gmx.de) + * Copyright (C) SUSE LINUX Products GmbH, 2008. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define _GNU_SOURCE /* for ULLONG_MAX without C99 */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <dirent.h> +#include <unistd.h> +#include <limits.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "firmware_memmap.h" +#include "kexec.h" + +/* + * If the system is too old for ULLONG_MAX or LLONG_MAX, define it here. + */ +#ifndef ULLONG_MAX +# define ULLONG_MAX (~0ULL) +#endif /* ULLONG_MAX */ + +#ifndef LLONG_MAX +# define LLONG_MAX (~0ULL >> 1) +#endif /* LLONG_MAX */ + + +/** + * The full path to the sysfs interface that provides the memory map. + */ +#define FIRMWARE_MEMMAP_DIR "/sys/firmware/memmap" + +/** + * Parses a file that only contains one number. Typical for sysfs files. + * + * @param[in] filename the name of the file that should be parsed + * @return the value that has been read or ULLONG_MAX on error. + */ +static unsigned long long parse_numeric_sysfs(const char *filename) +{ + FILE *fp; + char linebuffer[BUFSIZ]; + unsigned long long retval = ULLONG_MAX; + + fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, "Opening \"%s\" failed: %s\n", + filename, strerror(errno)); + return ULLONG_MAX; + } + + if (!fgets(linebuffer, BUFSIZ, fp)) + goto err; + + linebuffer[BUFSIZ-1] = 0; + + /* let strtoll() detect the base */ + retval = strtoll(linebuffer, NULL, 0); + +err: + fclose(fp); + + return retval; +} + +/** + * Reads the contents of a one-line sysfs file to buffer. (This function is + * not threadsafe.) + * + * @param[in] filename the name of the file that should be read + * + * @return NULL on failure, a pointer to a static buffer (that should be copied + * with strdup() if the caller plans to use it after next function call) + */ +static char *parse_string_sysfs(const char *filename) +{ + FILE *fp; + static char linebuffer[BUFSIZ]; + char *end; + + fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, "Opening \"%s\" failed: %s\n", + filename, strerror(errno)); + return NULL; + } + + if (!fgets(linebuffer, BUFSIZ, fp)) { + fclose(fp); + return NULL; + } + + linebuffer[BUFSIZ-1] = 0; + + /* truncate trailing newline(s) */ + end = linebuffer + strlen(linebuffer) - 1; + while (*end == '\n') + *end-- = 0; + + fclose(fp); + + return linebuffer; + +} + +static int parse_memmap_entry(const char *entry, struct memory_range *range) +{ + char filename[PATH_MAX]; + char *type; + int ret; + + /* + * entry/start + */ + ret = snprintf(filename, PATH_MAX, "%s/%s", entry, "start"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + range->start = parse_numeric_sysfs(filename); + if (range->start == ULLONG_MAX) + return -1; + + /* + * entry/end + */ + ret = snprintf(filename, PATH_MAX, "%s/%s", entry, "end"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + range->end = parse_numeric_sysfs(filename); + if (range->end == ULLONG_MAX) + return -1; + + /* + * entry/type + */ + ret = snprintf(filename, PATH_MAX, "%s/%s", entry, "type"); + if (ret < 0 || ret >= PATH_MAX) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + return -1; + } + + filename[PATH_MAX-1] = 0; + + type = parse_string_sysfs(filename); + if (!type) + return -1; + + if (strcmp(type, "System RAM") == 0) + range->type = RANGE_RAM; + else if (strcmp(type, "ACPI Tables") == 0) + range->type = RANGE_ACPI; + else if (strcmp(type, "Unusable memory") == 0) + range->type = RANGE_RESERVED; + else if (strcmp(type, "reserved") == 0) + range->type = RANGE_RESERVED; + else if (strcmp(type, "Reserved") == 0) + range->type = RANGE_RESERVED; + else if (strcmp(type, "Unknown E820 type") == 0) + range->type = RANGE_RESERVED; + else if (strcmp(type, "ACPI Non-volatile Storage") == 0) + range->type = RANGE_ACPI_NVS; + else if (strcmp(type, "Uncached RAM") == 0) + range->type = RANGE_UNCACHED; + else if (strcmp(type, "Persistent Memory (legacy)") == 0) + range->type = RANGE_PRAM; + else if (strcmp(type, "Persistent Memory") == 0) + range->type = RANGE_PMEM; + else { + fprintf(stderr, "Unknown type (%s) while parsing %s. Please " + "report this as bug. Using RANGE_RESERVED now.\n", + type, filename); + range->type = RANGE_RESERVED; + } + + return 0; +} + +/* documentation: firmware_memmap.h */ +int compare_ranges(const void *first, const void *second) +{ + const struct memory_range *first_range = first; + const struct memory_range *second_range = second; + + /* + * don't use the "first_range->start - second_range->start" + * notation because unsigned long long might overflow + */ + if (first_range->start > second_range->start) + return 1; + else if (first_range->start < second_range->start) + return -1; + else /* first_range->start == second_range->start */ + return 0; +} + +/* documentation: firmware_memmap.h */ +int have_sys_firmware_memmap(void) +{ + int ret; + struct stat mystat; + + ret = stat(FIRMWARE_MEMMAP_DIR, &mystat); + if (ret != 0) + return 0; + + return S_ISDIR(mystat.st_mode); +} + +/* documentation: firmware_memmap.h */ +int get_firmware_memmap_ranges(struct memory_range *range, size_t *ranges) +{ + DIR *firmware_memmap_dir = NULL; + struct dirent *dirent; + int i = 0; + + /* argument checking */ + if (!range || !ranges) { + fprintf(stderr, "%s: Invalid arguments.\n", __FUNCTION__); + return -1; + } + + /* open the directory */ + firmware_memmap_dir = opendir(FIRMWARE_MEMMAP_DIR); + if (!firmware_memmap_dir) { + perror("Could not open \"" FIRMWARE_MEMMAP_DIR "\""); + goto error; + } + + /* parse the entries */ + while ((dirent = readdir(firmware_memmap_dir)) != NULL) { + int ret; + char full_path[PATH_MAX]; + + /* array overflow check */ + if ((size_t)i >= *ranges) { + fprintf(stderr, "The firmware provides more entries " + "allowed (%zd). Please report that as bug.\n", + *ranges); + goto error; + } + + /* exclude '.' and '..' */ + if (dirent->d_name[0] && dirent->d_name[0] == '.') { + continue; + } + + snprintf(full_path, PATH_MAX, "%s/%s", FIRMWARE_MEMMAP_DIR, + dirent->d_name); + full_path[PATH_MAX-1] = 0; + ret = parse_memmap_entry(full_path, &range[i]); + if (ret < 0) { + goto error; + } + + i++; + } + + /* close the dir as we don't need it any more */ + closedir(firmware_memmap_dir); + + /* update the number of ranges for the caller */ + *ranges = i; + + /* and finally sort the entries with qsort */ + qsort(range, *ranges, sizeof(struct memory_range), compare_ranges); + + return 0; + +error: + if (firmware_memmap_dir) { + closedir(firmware_memmap_dir); + } + return -1; +} + diff --git a/kexec/firmware_memmap.h b/kexec/firmware_memmap.h new file mode 100644 index 0000000..eac0ddb --- /dev/null +++ b/kexec/firmware_memmap.h @@ -0,0 +1,76 @@ +/* + * firmware_memmap.c: Read /sys/firmware/memmap + * + * Created by: Bernhard Walle (bernhard.walle@gmx.de) + * Copyright (C) SUSE LINUX Products GmbH, 2008. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef FIRMWARE_MEMMAP_H +#define FIRMWARE_MEMMAP_H + +#include "kexec.h" + +/** + * Reads the /sys/firmware/memmap interface, documented in + * Documentation/ABI/testing/sysfs-firmware-memmap (kernel tree). + * + * The difference between /proc/iomem and /sys/firmware/memmap is that + * /sys/firmware/memmap provides the raw memory map, provided by the + * firmware of the system. That memory map should be passed to a kexec'd + * kernel because the behaviour should be the same as a normal booted kernel, + * so any limitation (e.g. by the user providing the mem command line option) + * should not be passed to the kexec'd kernel. + * + * The parsing of the code is independent of the architecture. However, the + * actual architecture-specific code might postprocess the code a bit, like + * x86 does. + */ + +/** + * Compares two memory ranges according to their start address. This function + * can be used with qsort() as @c compar function. + * + * @param[in] first a pointer to the first memory range + * @param[in] second a pointer to the second memory range + * @return 0 if @p first and @p second have the same start address, + * a value less then 0 if the start address of @p first is less than + * the start address of @p second, and a value greater than 0 if + * the opposite is in case. + */ +int compare_ranges(const void *first, const void *second); + +/** + * Checks if the kernel provides the /sys/firmware/memmap interface. + * It makes sense to use that function in advance before calling + * get_firmware_memmap_ranges() because the latter function prints an error + * if it cannot open the directory. If have_sys_firmware_memmap() returns + * false, then one can use the old /proc/iomem interface (for older kernels). + */ +int have_sys_firmware_memmap(void); + +/** + * Parses the /sys/firmware/memmap memory map. + * + * @param[out] range a pointer to an array of type struct memory_range with + * at least *range entries + * @param[in,out] ranges a pointer to an integer that holds the number of + * entries which range contains (at least). After successful + * return, the number of actual entries will be written. + * @return 0 on success, -1 on failure. + */ +int get_firmware_memmap_ranges(struct memory_range *range, size_t *ranges); + + +#endif /* FIRMWARE_MEMMAP_H */ diff --git a/kexec/fs2dt.c b/kexec/fs2dt.c new file mode 100644 index 0000000..993b73b --- /dev/null +++ b/kexec/fs2dt.c @@ -0,0 +1,852 @@ +/* + * fs2dt: creates a flattened device-tree + * + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE + +#include <sys/types.h> +#include <sys/stat.h> + +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "kexec.h" +#include "fs2dt.h" +#include "libfdt/fdt.h" + +#define MAXPATH 1024 /* max path name length */ +#define NAMESPACE 16384 /* max bytes for property names */ +#define INIT_TREE_WORDS 65536 /* Initial num words for prop values */ +#define MEMRESERVE 256 /* max number of reserved memory blocks */ +#define MEM_RANGE_CHUNK_SZ 2048 /* Initial num dwords for mem ranges */ + +static char pathname[MAXPATH]; +static char propnames[NAMESPACE] = { 0 }; +static unsigned *dt_base, *dt; +static unsigned int dt_cur_size; +static unsigned long long mem_rsrv[2*MEMRESERVE] = { 0ULL, 0ULL }; + +static int crash_param = 0; +static char local_cmdline[COMMAND_LINE_SIZE] = { "" }; + +extern unsigned char reuse_initrd; + +/* Used for enabling printing message from purgatory code + * Only has implemented for PPC64 */ +int my_debug; +int dt_no_old_root; + +/* This provides the behaviour of hte existing ppc64 implementation */ +static void pad_structure_block(size_t len) { +#ifdef NEED_STRUCTURE_BLOCK_EXTRA_PAD + if ((len >= 8) && ((unsigned long)dt & 0x4)) + dt++; +#endif +} + +/* Before we add something to the dt, reserve N words using this. + * If there isn't enough room, it's realloced -- and you don't overflow and + * splat bits of your heap. + */ +static void dt_reserve(unsigned **dt_ptr, unsigned words) +{ + unsigned int sz = INIT_TREE_WORDS; + + if (sz < words) + sz = words; + + if (((*dt_ptr - dt_base) + words) >= dt_cur_size) { + int offset; + unsigned int new_size = dt_cur_size + sz; + unsigned *new_dt = realloc(dt_base, new_size*4); + + if (!new_dt) + die("unrecoverable error: Can't realloc %d bytes for " + "device tree\n", new_size*4); + offset = *dt_ptr - dt_base; + dt_base = new_dt; + dt_cur_size = new_size; + *dt_ptr = dt_base + offset; + memset(*dt_ptr, 0, (new_size - offset)*4); + } +} + +void reserve(unsigned long long where, unsigned long long length) +{ + size_t offset; + + for (offset = 0; be64_to_cpu(mem_rsrv[offset + 1]); offset += 2) + ; + + if (offset + 4 >= 2 * MEMRESERVE) + die("unrecoverable error: exhasuted reservation meta data\n"); + + mem_rsrv[offset] = cpu_to_be64(where); + mem_rsrv[offset + 1] = cpu_to_be64(length); + mem_rsrv[offset + 2] = mem_rsrv[offset + 3] = cpu_to_be64(0); +} + +/* look for properties we need to reserve memory space for */ +static void checkprop(char *name, unsigned *data, int len) +{ + static unsigned long long base, size, end; + + if ((data == NULL) && (base || size || end)) + die("unrecoverable error: no property data"); + else if (!strcmp(name, "linux,rtas-base")) + base = be32_to_cpu(*data); + else if (!strcmp(name, "opal-base-address")) + base = be64_to_cpu(*(unsigned long long *)data); + else if (!strcmp(name, "opal-runtime-size")) + size = be64_to_cpu(*(unsigned long long *)data); + else if (!strcmp(name, "linux,tce-base")) + base = be64_to_cpu(*(unsigned long long *) data); + else if (!strcmp(name, "rtas-size") || + !strcmp(name, "linux,tce-size")) + size = be32_to_cpu(*data); + else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) { + if (len == 8) + base = be64_to_cpu(*(unsigned long long *) data); + else + base = be32_to_cpu(*data); + } else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) { + if (len == 8) + end = be64_to_cpu(*(unsigned long long *) data); + else + end = be32_to_cpu(*data); + } + + if (size && end) + die("unrecoverable error: size and end set at same time\n"); + if (base && size) { + reserve(base, size); + base = size = 0; + } + if (base && end) { + reserve(base, end-base); + base = end = 0; + } +} + +/* + * return the property index for a property name, creating a new one + * if needed. + */ +static unsigned propnum(const char *name) +{ + unsigned offset = 0; + + while(propnames[offset]) + if (strcmp(name, propnames+offset)) + offset += strlen(propnames+offset)+1; + else + return offset; + + if (NAMESPACE - offset < strlen(name) + 1) + die("unrecoverable error: propnames overrun\n"); + + strcpy(propnames+offset, name); + + return offset; +} + +/* + * Add ranges by comparing 'base' and 'end' addresses with usable + * memory ranges. Returns the number of ranges added. Each range added + * increments 'idx' by 2. + */ +static uint64_t add_ranges(uint64_t **ranges, int *ranges_size, int idx, + uint64_t base, uint64_t end) +{ + uint64_t loc_base, loc_end, rngs_cnt = 0; + size_t range; + int add = 0; + + for (range = 0; range < usablemem_rgns.size; range++) { + loc_base = usablemem_rgns.ranges[range].start; + loc_end = usablemem_rgns.ranges[range].end; + if (loc_base >= base && loc_end <= end) { + add = 1; + } else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + add = 1; + } + + if (add) { + if (idx >= ((*ranges_size) - 2)) { + (*ranges_size) += MEM_RANGE_CHUNK_SZ; + *ranges = realloc(*ranges, (*ranges_size)*8); + if (!(*ranges)) + die("unrecoverable error: can't realloc" + "%d bytes for ranges.\n", + (*ranges_size)*8); + } + (*ranges)[idx++] = cpu_to_be64(loc_base); + (*ranges)[idx++] = cpu_to_be64(loc_end - loc_base); + + rngs_cnt++; + } + } + + return rngs_cnt; +} + +#ifdef HAVE_DYNAMIC_MEMORY +static void add_dyn_reconf_usable_mem_property__(int fd) +{ + char fname[MAXPATH], *bname; + char buf[32]; + uint32_t lmbs_in_set = 1; + uint64_t *ranges; + int ranges_size = MEM_RANGE_CHUNK_SZ; + uint64_t base, end, rngs_cnt; + size_t i, j; + int rlen = 0; + int tmp_indx; + + strcpy(fname, pathname); + bname = strrchr(fname, '/'); + bname[0] = '\0'; + bname = strrchr(fname, '/'); + if (strncmp(bname, "/ibm,dynamic-reconfiguration-memory", 36)) + return; + + if (lseek(fd, 4, SEEK_SET) < 0) + die("unrecoverable error: error seeking in \"%s\": %s\n", + pathname, strerror(errno)); + + ranges = malloc(ranges_size*8); + if (!ranges) + die("unrecoverable error: can't alloc %d bytes for ranges.\n", + ranges_size*8); + + rlen = 0; + for (i = 0; i < num_of_lmb_sets; i++) { + if (read(fd, buf, LMB_ENTRY_SIZE) < 0) + die("unrecoverable error: error reading \"%s\": %s\n", + pathname, strerror(errno)); + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. Else, if + * the property is ibm,dynamic-memory, each entry represents + * one LMB. Make sure to add an entry for each LMB as kernel + * looks for a counter for every LMB. + */ + if (is_dyn_mem_v2) + lmbs_in_set = be32_to_cpu(((unsigned int *)buf)[0]); + + base = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + for (j = 0; j < lmbs_in_set; j++) { + end = base + lmb_size; + if (~0ULL - base < end) { + die("unrecoverable error: mem property" + " overflow\n"); + } + + tmp_indx = rlen++; + + rngs_cnt = add_ranges(&ranges, &ranges_size, rlen, + base, end); + if (rngs_cnt == 0) { + /* We still need to add a counter for every LMB + * because the kernel parsing code is dumb. We + * just have a zero in this case, with no + * following base/len. + */ + ranges[tmp_indx] = 0; + + /* rlen is already just tmp_indx+1 as we didn't + * write anything. Check array size here, as we + * will probably go on writing zeros for a while + */ + if (rlen >= (ranges_size-1)) { + ranges_size += MEM_RANGE_CHUNK_SZ; + ranges = realloc(ranges, ranges_size*8); + if (!ranges) + die("unrecoverable error: can't" + " realloc %d bytes for" + " ranges.\n", + ranges_size*8); + } + } else { + /* Store the count of (base, size) duple */ + ranges[tmp_indx] = cpu_to_be64(rngs_cnt); + rlen += rngs_cnt * 2; + } + + base = end; + } + } + + rlen = rlen * sizeof(uint64_t); + /* + * Add linux,drconf-usable-memory property. + */ + dt_reserve(&dt, 4+((rlen + 3)/4)); + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(rlen); + *dt++ = cpu_to_be32(propnum("linux,drconf-usable-memory")); + pad_structure_block(rlen); + memcpy(dt, ranges, rlen); + free(ranges); + dt += (rlen + 3)/4; +} + +static void add_dyn_reconf_usable_mem_property(struct dirent *dp, int fd) +{ + if ((!strcmp(dp->d_name, "ibm,dynamic-memory-v2") || + !strcmp(dp->d_name, "ibm,dynamic-memory")) && usablemem_rgns.size) + add_dyn_reconf_usable_mem_property__(fd); +} +#else +static void add_dyn_reconf_usable_mem_property(struct dirent *dp, int fd) {} +#endif + +static void add_usable_mem_property(int fd, size_t len) +{ + char fname[MAXPATH], *bname; + uint64_t buf[2]; + uint64_t *ranges; + int ranges_size = MEM_RANGE_CHUNK_SZ; + uint64_t base, end, rngs_cnt; + int rlen = 0; + + strcpy(fname, pathname); + bname = strrchr(fname,'/'); + bname[0] = '\0'; + bname = strrchr(fname,'/'); + if (strncmp(bname, "/memory@", 8) && strcmp(bname, "/memory")) + return; + + if (len < sizeof(buf)) + die("unrecoverable error: not enough data for mem property\n"); + + if (lseek(fd, 0, SEEK_SET) < 0) + die("unrecoverable error: error seeking in \"%s\": %s\n", + pathname, strerror(errno)); + if (read(fd, buf, sizeof(buf)) != sizeof(buf)) + die("unrecoverable error: error reading \"%s\": %s\n", + pathname, strerror(errno)); + + base = be64_to_cpu(buf[0]); + end = be64_to_cpu(buf[1]); + if (~0ULL - base < end) + die("unrecoverable error: mem property overflow\n"); + end += base; + + ranges = malloc(ranges_size * sizeof(*ranges)); + if (!ranges) + die("unrecoverable error: can't alloc %zu bytes for ranges.\n", + ranges_size * sizeof(*ranges)); + + rngs_cnt = add_ranges(&ranges, &ranges_size, rlen, base, end); + rlen += rngs_cnt * 2; + + if (!rlen) { + /* + * User did not pass any ranges for thsi region. Hence, write + * (0,0) duple in linux,usable-memory property such that + * this region will be ignored. + */ + ranges[rlen++] = 0; + ranges[rlen++] = 0; + } + + rlen = rlen * sizeof(*ranges); + /* + * No add linux,usable-memory property. + */ + dt_reserve(&dt, 4+((rlen + 3)/4)); + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(rlen); + *dt++ = cpu_to_be32(propnum("linux,usable-memory")); + pad_structure_block(rlen); + memcpy(dt, ranges, rlen); + free(ranges); + dt += (rlen + 3)/4; +} + +/* put all properties (files) in the property structure */ +static void putprops(char *fn, struct dirent **nlist, int numlist) +{ + struct dirent *dp; + int i = 0, fd; + off_t len; + off_t slen; + struct stat statbuf; + + for (i = 0; i < numlist; i++) { + dp = nlist[i]; + strcpy(fn, dp->d_name); + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + /* Empirically, this seems to need to be ecluded. + * Observed on ARM with 3.6-rc2 kernel + */ + if (!strcmp(dp->d_name, "name")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (!crash_param && !strcmp(fn,"linux,crashkernel-base")) + continue; + + if (!crash_param && !strcmp(fn,"linux,crashkernel-size")) + continue; + + /* + * This property will be created for each node during kexec + * boot. So, ignore it. + */ + if (!strcmp(dp->d_name, "linux,pci-domain") || + !strcmp(dp->d_name, "linux,htab-base") || + !strcmp(dp->d_name, "linux,htab-size") || + !strcmp(dp->d_name, "linux,kernel-end")) + continue; + + /* This property will be created/modified later in putnode() + * So ignore it, unless we are reusing the initrd. + */ + if ((!strcmp(dp->d_name, "linux,initrd-start") || + !strcmp(dp->d_name, "linux,initrd-end")) && + !reuse_initrd) + continue; + + /* This property will be created later in putnode() So + * ignore it now. + */ + if (!strcmp(dp->d_name, "bootargs")) + continue; + + if (! S_ISREG(statbuf.st_mode)) + continue; + + len = statbuf.st_size; + + dt_reserve(&dt, 4+((len + 3)/4)); + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(len); + *dt++ = cpu_to_be32(propnum(fn)); + pad_structure_block(len); + + if (len) { + char *buf; + + buf = slurp_file_len(pathname, len, &slen); + if (slen != len) + die("unrecoverable error: short read from\"%s\"\n", + pathname); + + memcpy(dt, buf, slen); + free(buf); + } + + checkprop(fn, dt, len); + + dt += (len + 3)/4; + + fd = open(pathname, O_RDONLY); + if (fd == -1) + die("unrecoverable error: could not open \"%s\": %s\n", + pathname, strerror(errno)); + + if (!strcmp(dp->d_name, "reg") && usablemem_rgns.size) + add_usable_mem_property(fd, len); + add_dyn_reconf_usable_mem_property(dp, fd); + close(fd); + } + + fn[0] = '\0'; + checkprop(pathname, NULL, 0); +} + +/* + * Compare function used to sort the device-tree directories + * This function will be passed to scandir. + */ +static int comparefunc(const struct dirent **dentry1, + const struct dirent **dentry2) +{ + char *str1 = (*(struct dirent **)dentry1)->d_name; + char *str2 = (*(struct dirent **)dentry2)->d_name; + char *sep1 = strchr(str1, '@'); + char *sep2 = strchr(str2, '@'); + + /* + * strcmp scans from left to right and fails to idetify for some + * strings such as memory@10000000 and memory@f000000. + * Therefore, we get the wrong sorted order like memory@10000000 and + * memory@f000000. + */ + if (sep1 && sep2) { + int baselen1 = sep1 - str1; + int baselen2 = sep2 - str2; + int len1 = strlen(str1); + int len2 = strlen(str2); + + /* + * Check the base name matches, and the properties are + * different lengths. + */ + if ((baselen1 == baselen2) && (len1 != len2) && + !strncmp(str1, str2, baselen2)) + return (len1 > len2) - (len1 < len2); + } + + return strcmp(str1, str2); +} + +/* grab root= from the old command line */ +static void dt_copy_old_root_param(void) +{ + FILE *fp; + char filename[MAXPATH]; + char *last_cmdline = NULL; + char *p, *old_param; + size_t len = 0; + + strcpy(filename, pathname); + strcat(filename, "bootargs"); + fp = fopen(filename, "r"); + if (!fp) + return; + + if (getline(&last_cmdline, &len, fp) == -1) + die("unable to read %s\n", filename); + + p = strstr(last_cmdline, "root="); + if (p) { + old_param = strtok(p, " "); + len = strlen(local_cmdline); + if (len != 0) + strcat(local_cmdline, " "); + strcat(local_cmdline, old_param); + } + + if (last_cmdline) + free(last_cmdline); + + fclose(fp); +} + +/* + * put a node (directory) in the property structure. first properties + * then children. + */ +static void putnode(void) +{ + char *dn; + struct dirent *dp; + char *basename; + struct dirent **namelist; + int numlist, i; + struct stat statbuf; + int plen, ret; + + numlist = scandir(pathname, &namelist, 0, comparefunc); + if (numlist < 0) + die("unrecoverable error: could not scan \"%s\": %s\n", + pathname, strerror(errno)); + if (numlist == 0) + die("unrecoverable error: no directory entries in \"%s\"", + pathname); + + basename = strrchr(pathname,'/') + 1; + + plen = *basename ? strlen(basename) : 0; + /* Reserve space for string packed to words; e.g. string length 10 + * occupies 3 words, length 12 occupies 4 (for terminating \0s). + * So round up & include the \0: + */ + dt_reserve(&dt, 1+((plen + 4)/4)); + *dt++ = cpu_to_be32(FDT_BEGIN_NODE); + strcpy((void *)dt, *basename ? basename : ""); + dt += ((plen + 4)/4); + + if (*basename) + strcat(pathname, "/"); + dn = pathname + strlen(pathname); + + putprops(dn, namelist, numlist); + + /* Add initrd entries to the second kernel */ + if (initrd_base && initrd_size && !strcmp(basename,"chosen/")) { + int len = 8; + uint64_t bevalue; + + dt_reserve(&dt, 12); /* both props, of 6 words ea. */ + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(len); + *dt++ = cpu_to_be32(propnum("linux,initrd-start")); + pad_structure_block(len); + + bevalue = cpu_to_be64(initrd_base); + memcpy(dt, &bevalue, len); + dt += (len + 3)/4; + + len = 8; + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(len); + *dt++ = cpu_to_be32(propnum("linux,initrd-end")); + + bevalue = cpu_to_be64(initrd_base + initrd_size); + pad_structure_block(len); + + memcpy(dt, &bevalue, len); + dt += (len + 3)/4; + + reserve(initrd_base, initrd_size); + } + + /* + * Add cmdline to the second kernel. Use the old root= cmdline if there + * is no root= in the new command line and there's no --dt-no-old-root + * option being used. + */ + if (!strcmp(basename,"chosen/")) { + size_t result; + size_t cmd_len = 0; + char *param = NULL; + char filename[MAXPATH]; + char *buff; + int fd; + + cmd_len = strlen(local_cmdline); + if (cmd_len != 0) { + param = strstr(local_cmdline, "crashkernel="); + if (param) + crash_param = 1; + /* does the new cmdline have a root= ? ... */ + param = strstr(local_cmdline, "root="); + } + + if (!param && !dt_no_old_root) + dt_copy_old_root_param(); + + strcat(local_cmdline, " "); + cmd_len = strlen(local_cmdline); + cmd_len = cmd_len + 1; + + /* add new bootargs */ + dt_reserve(&dt, 4+((cmd_len+3)/4)); + *dt++ = cpu_to_be32(FDT_PROP); + *dt++ = cpu_to_be32(cmd_len); + *dt++ = cpu_to_be32(propnum("bootargs")); + pad_structure_block(cmd_len); + memcpy(dt, local_cmdline,cmd_len); + dt += (cmd_len + 3)/4; + + fprintf(stderr, "Modified cmdline:%s\n", local_cmdline); + + /* + * Determine the platform type/stdout type, so that purgatory + * code can print 'I'm in purgatory' message. Currently only + * pseries/hvcterminal is supported. + */ + ret = snprintf(filename, MAXPATH, "%sstdout-path", pathname); + if (ret < 0 || ret >= MAXPATH) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + goto no_debug; + } + + fd = open(filename, O_RDONLY); + if (fd == -1) { + ret = snprintf(filename, MAXPATH, "%slinux,stdout-path", pathname); + if (ret < 0 || ret >= MAXPATH) { + fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); + goto no_debug; + } + + fd = open(filename, O_RDONLY); + if (fd == -1) { + printf("Unable to find %s[linux,]stdout-path, printing from purgatory is disabled\n", + pathname); + goto no_debug; + } + } + if (fstat(fd, &statbuf)) { + printf("Unable to stat %s, printing from purgatory is disabled\n", + filename); + close(fd); + goto no_debug; + + } + + buff = malloc(statbuf.st_size); + if (!buff) { + printf("Can not allocate memory for buff\n"); + close(fd); + goto no_debug; + } + result = read(fd, buff, statbuf.st_size); + close(fd); + if (result <= 0) { + printf("Unable to read %s, printing from purgatory is disabled\n", + filename); + goto no_debug; + } + snprintf(filename, MAXPATH, "/proc/device-tree/%s/compatible", buff); + fd = open(filename, O_RDONLY); + if (fd == -1) { + printf("Unable to find %s printing from purgatory is disabled\n", + filename); + goto no_debug; + } + if (fstat(fd, &statbuf)) { + printf("Unable to stat %s printing from purgatory is disabled\n", + filename); + close(fd); + goto no_debug; + } + buff = realloc(buff, statbuf.st_size); + if (!buff) { + printf("Can not allocate memory for buff\n"); + close(fd); + goto no_debug; + } + result = read(fd, buff, statbuf.st_size); + if (result && (!strcmp(buff, "hvterm1") + || !strcmp(buff, "hvterm-protocol"))) + my_debug = 1; + close(fd); + free(buff); + } + +no_debug: + for (i=0; i < numlist; i++) { + dp = namelist[i]; + strcpy(dn, dp->d_name); + free(namelist[i]); + + if (!strcmp(dn, ".") || !strcmp(dn, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (S_ISDIR(statbuf.st_mode)) + putnode(); + } + + dt_reserve(&dt, 1); + *dt++ = cpu_to_be32(FDT_END_NODE); + dn[-1] = '\0'; + free(namelist); +} + +struct bootblock bb[1]; + +static void add_boot_block(char **bufp, off_t *sizep) +{ + unsigned long len; + unsigned long tlen, toff; + char *buf; + + len = _ALIGN(sizeof(bb[0]), 8); + + bb->off_mem_rsvmap = cpu_to_be32(len); + + for (len = 1; be64_to_cpu(mem_rsrv[len]); len += 2) + ; + len++; +#ifdef NEED_RESERVE_DTB + len+= 3; /* Leave space for totalsize reservation */ +#endif + len *= sizeof(mem_rsrv[0]); + + bb->off_dt_struct = cpu_to_be32(be32_to_cpu(bb->off_mem_rsvmap) + len); + + len = dt - dt_base; + len *= sizeof(unsigned); +#if (BOOT_BLOCK_VERSION >= 17) + bb->dt_struct_size = cpu_to_be32(len); +#endif + bb->off_dt_strings = cpu_to_be32(be32_to_cpu(bb->off_dt_struct) + len); + + len = propnum(""); + bb->dt_strings_size = cpu_to_be32(len); + len = _ALIGN(len, 4); + bb->totalsize = cpu_to_be32(be32_to_cpu(bb->off_dt_strings) + len); + + bb->magic = cpu_to_be32(FDT_MAGIC); + bb->version = cpu_to_be32(BOOT_BLOCK_VERSION); + bb->last_comp_version = cpu_to_be32(BOOT_BLOCK_LAST_COMP_VERSION); + +#ifdef NEED_RESERVE_DTB + reserve(0, be32_to_cpu(bb->totalsize)); /* patched later in kexec_load */ +#endif + + buf = malloc(be32_to_cpu(bb->totalsize)); + *bufp = buf; + + tlen = be32_to_cpu(bb->off_mem_rsvmap); + memcpy(buf, bb, tlen); + + toff = be32_to_cpu(bb->off_mem_rsvmap); + tlen = be32_to_cpu(bb->off_dt_struct) - be32_to_cpu(bb->off_mem_rsvmap); + memcpy(buf + toff, mem_rsrv, tlen); + + toff += be32_to_cpu(bb->off_dt_struct) - be32_to_cpu(bb->off_mem_rsvmap); + tlen = be32_to_cpu(bb->off_dt_strings) - be32_to_cpu(bb->off_dt_struct); + memcpy(buf + toff, dt_base, tlen); + + toff += be32_to_cpu(bb->off_dt_strings) - be32_to_cpu(bb->off_dt_struct); + tlen = be32_to_cpu(bb->totalsize) - be32_to_cpu(bb->off_dt_strings); + memcpy(buf + toff, propnames, tlen); + + *sizep = toff + be32_to_cpu(bb->totalsize) - + be32_to_cpu(bb->off_dt_strings); +} + +void create_flatten_tree(char **bufp, off_t *sizep, const char *cmdline) +{ + strcpy(pathname, "/proc/device-tree/"); + + dt_cur_size = INIT_TREE_WORDS; + dt_base = malloc(dt_cur_size*4); + if (!dt_base) { + die("Can't malloc %d bytes for dt struct!\n", dt_cur_size*4); + } + memset(dt_base, 0, dt_cur_size*4); + + dt = dt_base; + + if (cmdline) + strcpy(local_cmdline, cmdline); + + putnode(); + dt_reserve(&dt, 1); + *dt++ = cpu_to_be32(FDT_END); + + add_boot_block(bufp, sizep); + free(dt_base); +} diff --git a/kexec/fs2dt.h b/kexec/fs2dt.h new file mode 100644 index 0000000..fe24931 --- /dev/null +++ b/kexec/fs2dt.h @@ -0,0 +1,39 @@ +#ifndef FS2DT_H +#define FS2DT_H + +#if (BOOT_BLOCK_VERSION != 2 && BOOT_BLOCK_VERSION != 17) +#error Please add or correct definition of BOOT_BLOCK_VERSION +#endif + +/* boot block as defined by the linux kernel */ +struct bootblock { + unsigned magic; + unsigned totalsize; + unsigned off_dt_struct; + unsigned off_dt_strings; + unsigned off_mem_rsvmap; + unsigned version; + unsigned last_comp_version; +#if (BOOT_BLOCK_VERSION >= 2) + /* version 2 fields below */ + unsigned boot_physid; + /* version 3 fields below */ + unsigned dt_strings_size; +#if (BOOT_BLOCK_VERSION >= 17) + /* version 17 fields below */ + unsigned dt_struct_size; +#endif +#endif +}; + +extern struct bootblock bb[1]; + +/* Used for enabling printing message from purgatory code + * Only has implemented for PPC64 */ +extern int my_debug; +extern int dt_no_old_root; + +void reserve(unsigned long long where, unsigned long long length); +void create_flatten_tree(char **, off_t *, const char *); + +#endif /* KEXEC_H */ diff --git a/kexec/ifdown.c b/kexec/ifdown.c new file mode 100644 index 0000000..3ac19c1 --- /dev/null +++ b/kexec/ifdown.c @@ -0,0 +1,79 @@ +/* + * ifdown.c Find all network interfaces on the system and + * shut them down. + * + */ +char *v_ifdown = "@(#)ifdown.c 1.11 02-Jun-1998 miquels@cistron.nl"; + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <errno.h> + +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/time.h> + +#include <net/if.h> +#include <netinet/in.h> + +/* + * First, we find all shaper devices and down them. Then we + * down all real interfaces. This is because the comment in the + * shaper driver says "if you down the shaper device before the + * attached inerface your computer will follow". + */ +int ifdown(void) +{ + struct if_nameindex *ifa, *ifp; + struct ifreq ifr; + int fd, shaper; + + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + fprintf(stderr, "ifdown: "); + perror("socket"); + goto error; + } + + if ((ifa = if_nameindex()) == NULL) { + fprintf(stderr, "ifdown: "); + perror("if_nameindex"); + goto error; + } + + for (shaper = 1; shaper >= 0; shaper--) { + for (ifp = ifa; ifp->if_index; ifp++) { + + if ((strncmp(ifp->if_name, "shaper", 6) == 0) + != shaper) continue; + if (strcmp(ifp->if_name, "lo") == 0) + continue; + if (strchr(ifp->if_name, ':') != NULL) + continue; + + strncpy(ifr.ifr_name, ifp->if_name, IFNAMSIZ-1); + ifr.ifr_name[IFNAMSIZ-1] = 0; + if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) { + fprintf(stderr, "ifdown: shutdown "); + perror(ifp->if_name); + goto error; + } + ifr.ifr_flags &= ~(IFF_UP); + if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0) { + fprintf(stderr, "ifdown: shutdown "); + perror(ifp->if_name); + goto error; + } + + } + } + + close(fd); + return 0; + +error: + close(fd); + return -1; +} diff --git a/kexec/kexec-elf-boot.c b/kexec/kexec-elf-boot.c new file mode 100644 index 0000000..38f9056 --- /dev/null +++ b/kexec/kexec-elf-boot.c @@ -0,0 +1,89 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include <x86/x86-linux.h> +#include "kexec.h" +#include "kexec-elf.h" +#include "kexec-elf-boot.h" + + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) + +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} boot_notes = { + .hdr = { + .b_signature = ELF_BOOT_MAGIC, + .b_size = sizeof(boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; + +unsigned long elf_boot_notes( + struct kexec_info *info, unsigned long max_addr, + const char *cmdline, int cmdline_len) +{ + unsigned long note_bytes; + unsigned long note_base; + struct boot_notes *notes; + note_bytes = sizeof(*notes) + _ALIGN(cmdline_len, 4); + notes = xmalloc(note_bytes); + memcpy(notes, &boot_notes, sizeof(boot_notes)); + memcpy(notes->command_line, cmdline, cmdline_len); + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = cmdline_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + note_base = add_buffer(info, notes, note_bytes, note_bytes, + 4, 0, max_addr, 1); + + return note_base; +} diff --git a/kexec/kexec-elf-boot.h b/kexec/kexec-elf-boot.h new file mode 100644 index 0000000..cff5c61 --- /dev/null +++ b/kexec/kexec-elf-boot.h @@ -0,0 +1,8 @@ +#ifndef KEXEC_ELF_BOOT_H +#define KEXEC_ELF_BOOT_H + +unsigned long elf_boot_notes( + struct kexec_info *info, unsigned long max_addr, + const char *cmdline, int cmdline_len); + +#endif /* KEXEC_ELF_BOOT_H */ diff --git a/kexec/kexec-elf-core.c b/kexec/kexec-elf-core.c new file mode 100644 index 0000000..a341fdb --- /dev/null +++ b/kexec/kexec-elf-core.c @@ -0,0 +1,29 @@ +#include <stdio.h> +#include <stdint.h> +#include <errno.h> +#include <stdlib.h> +#include "elf.h" +#include "kexec-elf.h" + + +int build_elf_core_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + int result; + result = build_elf_info(buf, len, ehdr, flags); + if (result < 0) { + return result; + } + if ((ehdr->e_type != ET_CORE)) { + /* not an ELF Core */ + fprintf(stderr, "Not ELF type ET_CORE\n"); + return -1; + } + if (!ehdr->e_phdr) { + /* No program header */ + fprintf(stderr, "No ELF program header\n"); + return -1; + } + + return 0; +} diff --git a/kexec/kexec-elf-exec.c b/kexec/kexec-elf-exec.c new file mode 100644 index 0000000..bea7b3e --- /dev/null +++ b/kexec/kexec-elf-exec.c @@ -0,0 +1,228 @@ +#include <limits.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include "elf.h" +#include <boot/elf_boot.h> +#include "kexec.h" +#include "kexec-elf.h" + +static const int probe_debug = 0; + +static void load_elf_segments(struct mem_ehdr *ehdr, struct kexec_info *info, unsigned long base) +{ + size_t i; + + /* Read in the PT_LOAD segments */ + for(i = 0; i < ehdr->e_phnum; i++) { + struct mem_phdr *phdr; + size_t size; + phdr = &ehdr->e_phdr[i]; + if (phdr->p_type != PT_LOAD) { + continue; + } + size = phdr->p_filesz; + if (size > phdr->p_memsz) { + size = phdr->p_memsz; + } + add_segment(info, phdr->p_data, size, + phdr->p_paddr + base, phdr->p_memsz); + } +} + +static int get_elf_exec_load_base(struct mem_ehdr *ehdr, struct kexec_info *info, + unsigned long min, unsigned long max, + unsigned long align, unsigned long *base) +{ + unsigned long first, last; + size_t i; + + /* Note on arm64: + * arm64's vmlinux has virtual address in physical address + * field of PT_LOAD segments. So the following validity check + * and relocation makes no sense on arm64. + */ + if (ehdr->e_machine == EM_AARCH64) + return 0; + + first = ULONG_MAX; + last = 0; + for(i = 0; i < ehdr->e_phnum; i++) { + unsigned long start, stop; + struct mem_phdr *phdr; + phdr = &ehdr->e_phdr[i]; + if ((phdr->p_type != PT_LOAD) || + (phdr->p_memsz == 0)) + { + continue; + } + start = phdr->p_paddr; + stop = start + phdr->p_memsz; + if (first > start) { + first = start; + } + if (last < stop) { + last = stop; + } + if (align < phdr->p_align) { + align = phdr->p_align; + } + } + + if ((max - min) < (last - first)) + return -1; + + if (!valid_memory_range(info, min > first ? min : first, max < last ? max : last)) { + unsigned long hole; + hole = locate_hole(info, last - first + 1, align, min, max, 1); + if (hole == ULONG_MAX) + return -1; + + /* Base is the value that when added + * to any virtual address in the file + * yields it's load virtual address. + */ + *base = hole - first; + } + return 0; +} + +int build_elf_exec_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + struct mem_phdr *phdr, *end_phdr; + int result; + result = build_elf_info(buf, len, ehdr, flags); + if (result < 0) { + return result; + } + if ((ehdr->e_type != ET_EXEC) && (ehdr->e_type != ET_DYN) && + (ehdr->e_type != ET_CORE)) { + /* not an ELF executable */ + if (probe_debug) { + fprintf(stderr, "Not ELF type ET_EXEC or ET_DYN\n"); + } + return -1; + } + if (!ehdr->e_phdr) { + /* No program header */ + fprintf(stderr, "No ELF program header\n"); + return -1; + } + end_phdr = &ehdr->e_phdr[ehdr->e_phnum]; + for(phdr = ehdr->e_phdr; phdr != end_phdr; phdr++) { + /* Kexec does not support loading interpreters. + * In addition this check keeps us from attempting + * to kexec ordinay executables. + */ + if (phdr->p_type == PT_INTERP) { + fprintf(stderr, "Requires an ELF interpreter\n"); + return -1; + } + } + + return 0; +} + + +int elf_exec_load(struct mem_ehdr *ehdr, struct kexec_info *info) +{ + unsigned long base; + int result; + + if (!ehdr->e_phdr) { + fprintf(stderr, "No program header?\n"); + result = -1; + goto out; + } + + /* If I have a dynamic executable find it's size + * and then find a location for it in memory. + */ + base = 0; + if (ehdr->e_type == ET_DYN) { + result = get_elf_exec_load_base(ehdr, info, 0, elf_max_addr(ehdr), 0 /* align */, &base); + if (result < 0) + goto out; + } + + load_elf_segments(ehdr, info, base); + + /* Update entry point to reflect new load address*/ + ehdr->e_entry += base; + + result = 0; + out: + return result; +} + +int elf_exec_load_relocatable(struct mem_ehdr *ehdr, struct kexec_info *info, + unsigned long reloc_min, unsigned long reloc_max, + unsigned long align) +{ + unsigned long base; + int result; + + if (reloc_min > reloc_max) { + fprintf(stderr, "Bad relocation range, start=%lux > end=%lux.\n", reloc_min, reloc_max); + result = -1; + goto out; + } + if (!ehdr->e_phdr) { + fprintf(stderr, "No program header?\n"); + result = -1; + goto out; + } + + base = 0; + result = get_elf_exec_load_base(ehdr, info, reloc_min, reloc_max, align, &base); + if (result < 0) + goto out; + + load_elf_segments(ehdr, info, base); + + /* Update entry point to reflect new load address*/ + ehdr->e_entry += base; + + result = 0; + out: + return result; +} + +void elf_exec_build_load(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, uint32_t flags) +{ + int result; + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, ehdr, flags); + if (result < 0) { + die("ELF exec parse failed\n"); + } + + /* Load the Elf data */ + result = elf_exec_load(ehdr, info); + if (result < 0) { + die("ELF exec load failed\n"); + } +} + +void elf_exec_build_load_relocatable(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, uint32_t flags, + unsigned long reloc_min, unsigned long reloc_max, + unsigned long align) +{ + int result; + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, ehdr, flags); + if (result < 0) { + die("%s: ELF exec parse failed\n", __func__); + } + + /* Load the Elf data */ + result = elf_exec_load_relocatable(ehdr, info, reloc_min, reloc_max, align); + if (result < 0) { + die("%s: ELF exec load failed\n", __func__); + } +}
\ No newline at end of file diff --git a/kexec/kexec-elf-rel.c b/kexec/kexec-elf-rel.c new file mode 100644 index 0000000..0a8b4d2 --- /dev/null +++ b/kexec/kexec-elf-rel.c @@ -0,0 +1,553 @@ +#include <limits.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include "elf.h" +#include <boot/elf_boot.h> +#include "kexec.h" +#include "kexec-elf.h" + +static const int probe_debug = 0; + +static size_t elf_sym_size(struct mem_ehdr *ehdr) +{ + size_t sym_size = 0; + if (ehdr->ei_class == ELFCLASS32) { + sym_size = sizeof(Elf32_Sym); + } + else if (ehdr->ei_class == ELFCLASS64) { + sym_size = sizeof(Elf64_Sym); + } + else { + die("Bad elf class"); + } + return sym_size; +} + +static size_t elf_rel_size(struct mem_ehdr *ehdr) +{ + size_t rel_size = 0; + if (ehdr->ei_class == ELFCLASS32) { + rel_size = sizeof(Elf32_Rel); + } + else if (ehdr->ei_class == ELFCLASS64) { + rel_size = sizeof(Elf64_Rel); + } + else { + die("Bad elf class"); + } + return rel_size; +} + +static size_t elf_rela_size(struct mem_ehdr *ehdr) +{ + size_t rel_size = 0; + if (ehdr->ei_class == ELFCLASS32) { + rel_size = sizeof(Elf32_Rela); + } + else if (ehdr->ei_class == ELFCLASS64) { + rel_size = sizeof(Elf64_Rela); + } + else { + die("Bad elf class"); + } + return rel_size; +} + +static struct mem_sym elf_sym(struct mem_ehdr *ehdr, const unsigned char *ptr) +{ + struct mem_sym sym = { 0, 0, 0, 0, 0, 0 }; + if (ehdr->ei_class == ELFCLASS32) { + Elf32_Sym lsym; + memcpy(&lsym, ptr, sizeof(lsym)); + sym.st_name = elf32_to_cpu(ehdr, lsym.st_name); + sym.st_value = elf32_to_cpu(ehdr, lsym.st_value); + sym.st_size = elf32_to_cpu(ehdr, lsym.st_size); + sym.st_info = lsym.st_info; + sym.st_other = lsym.st_other; + sym.st_shndx = elf16_to_cpu(ehdr, lsym.st_shndx); + } + else if (ehdr->ei_class == ELFCLASS64) { + Elf64_Sym lsym; + memcpy(&lsym, ptr, sizeof(lsym)); + sym.st_name = elf32_to_cpu(ehdr, lsym.st_name); + sym.st_value = elf64_to_cpu(ehdr, lsym.st_value); + sym.st_size = elf64_to_cpu(ehdr, lsym.st_size); + sym.st_info = lsym.st_info; + sym.st_other = lsym.st_other; + sym.st_shndx = elf16_to_cpu(ehdr, lsym.st_shndx); + } + else { + die("Bad elf class"); + } + return sym; +} + +static struct mem_rela elf_rel(struct mem_ehdr *ehdr, const unsigned char *ptr) +{ + struct mem_rela rela = { 0, 0, 0, 0 }; + if (ehdr->ei_class == ELFCLASS32) { + Elf32_Rel lrel; + memcpy(&lrel, ptr, sizeof(lrel)); + rela.r_offset = elf32_to_cpu(ehdr, lrel.r_offset); + rela.r_sym = ELF32_R_SYM(elf32_to_cpu(ehdr, lrel.r_info)); + rela.r_type = ELF32_R_TYPE(elf32_to_cpu(ehdr, lrel.r_info)); + rela.r_addend = 0; + } + else if (ehdr->ei_class == ELFCLASS64) { + Elf64_Rel lrel; + memcpy(&lrel, ptr, sizeof(lrel)); + rela.r_offset = elf64_to_cpu(ehdr, lrel.r_offset); + rela.r_sym = ELF64_R_SYM(elf64_to_cpu(ehdr, lrel.r_info)); + rela.r_type = ELF64_R_TYPE(elf64_to_cpu(ehdr, lrel.r_info)); + rela.r_addend = 0; + } + else { + die("Bad elf class"); + } + return rela; +} + +static struct mem_rela elf_rela(struct mem_ehdr *ehdr, const unsigned char *ptr) +{ + struct mem_rela rela = { 0, 0, 0, 0 }; + if (ehdr->ei_class == ELFCLASS32) { + Elf32_Rela lrela; + memcpy(&lrela, ptr, sizeof(lrela)); + rela.r_offset = elf32_to_cpu(ehdr, lrela.r_offset); + rela.r_sym = ELF32_R_SYM(elf32_to_cpu(ehdr, lrela.r_info)); + rela.r_type = ELF32_R_TYPE(elf32_to_cpu(ehdr, lrela.r_info)); + rela.r_addend = elf32_to_cpu(ehdr, lrela.r_addend); + } + else if (ehdr->ei_class == ELFCLASS64) { + Elf64_Rela lrela; + memcpy(&lrela, ptr, sizeof(lrela)); + rela.r_offset = elf64_to_cpu(ehdr, lrela.r_offset); + rela.r_sym = ELF64_R_SYM(elf64_to_cpu(ehdr, lrela.r_info)); + rela.r_type = ELF64_R_TYPE(elf64_to_cpu(ehdr, lrela.r_info)); + rela.r_addend = elf64_to_cpu(ehdr, lrela.r_addend); + } + else { + die("Bad elf class"); + } + return rela; +} + +int build_elf_rel_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + int result; + result = build_elf_info(buf, len, ehdr, flags); + if (result < 0) { + return result; + } + if (ehdr->e_type != ET_REL) { + /* not an ELF relocate object */ + if (probe_debug) { + fprintf(stderr, "Not ELF type ET_REL\n"); + fprintf(stderr, "ELF Type: %x\n", ehdr->e_type); + } + return -1; + } + if (!ehdr->e_shdr) { + /* No section headers */ + if (probe_debug) { + fprintf(stderr, "No ELF section headers\n"); + } + return -1; + } + if (!machine_verify_elf_rel(ehdr)) { + /* It does not meant the native architecture constraints */ + if (probe_debug) { + fprintf(stderr, "ELF architecture constraint failure\n"); + } + return -1; + } + return 0; +} + +static unsigned long get_section_addralign(struct mem_shdr *shdr) +{ + return (shdr->sh_addralign == 0) ? 1 : shdr->sh_addralign; +} + +int elf_rel_load(struct mem_ehdr *ehdr, struct kexec_info *info, + unsigned long min, unsigned long max, int end) +{ + struct mem_shdr *shdr, *shdr_end, *entry_shdr; + unsigned long entry; + int result; + unsigned char *buf; + unsigned long buf_align, bufsz, bss_align, bsssz, bss_pad; + unsigned long buf_addr, data_addr, bss_addr; + + if (max > elf_max_addr(ehdr)) { + max = elf_max_addr(ehdr); + } + if (!ehdr->e_shdr) { + fprintf(stderr, "No section header?\n"); + result = -1; + goto out; + } + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + + /* Find which section entry is in */ + entry_shdr = NULL; + entry = ehdr->e_entry; + for(shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (!(shdr->sh_flags & SHF_ALLOC)) { + continue; + } + if (!(shdr->sh_flags & SHF_EXECINSTR)) { + continue; + } + /* Make entry section relative */ + if ((shdr->sh_addr <= ehdr->e_entry) && + ((shdr->sh_addr + shdr->sh_size) > ehdr->e_entry)) { + entry_shdr = shdr; + entry -= shdr->sh_addr; + break; + } + } + + /* Find the memory footprint of the relocatable object */ + buf_align = 1; + bss_align = 1; + bufsz = 0; + bsssz = 0; + for(shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (!(shdr->sh_flags & SHF_ALLOC)) { + continue; + } + if (shdr->sh_type != SHT_NOBITS) { + unsigned long align; + align = get_section_addralign(shdr); + /* See if I need more alignment */ + if (buf_align < align) { + buf_align = align; + } + /* Now align bufsz */ + bufsz = _ALIGN(bufsz, align); + /* And now add our buffer */ + bufsz += shdr->sh_size; + } + else { + unsigned long align; + align = get_section_addralign(shdr); + /* See if I need more alignment */ + if (bss_align < align) { + bss_align = align; + } + /* Now align bsssz */ + bsssz = _ALIGN(bsssz, align); + /* And now add our buffer */ + bsssz += shdr->sh_size; + } + } + if (buf_align < bss_align) { + buf_align = bss_align; + } + bss_pad = 0; + if (bufsz & (bss_align - 1)) { + bss_pad = bss_align - (bufsz & (bss_align - 1)); + } + + /* Allocate where we will put the relocated object */ + buf = xmalloc(bufsz); + buf_addr = add_buffer(info, buf, bufsz, bufsz + bss_pad + bsssz, + buf_align, min, max, end); + ehdr->rel_addr = buf_addr; + ehdr->rel_size = bufsz + bss_pad + bsssz; + + /* Walk through and find an address for each SHF_ALLOC section */ + data_addr = buf_addr; + bss_addr = buf_addr + bufsz + bss_pad; + for(shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + unsigned long align; + if (!(shdr->sh_flags & SHF_ALLOC)) { + continue; + } + align = get_section_addralign(shdr); + if (shdr->sh_type != SHT_NOBITS) { + unsigned long off; + /* Adjust the address */ + data_addr = _ALIGN(data_addr, align); + + /* Update the section */ + off = data_addr - buf_addr; + memcpy(buf + off, shdr->sh_data, shdr->sh_size); + shdr->sh_addr = data_addr; + shdr->sh_data = buf + off; + + /* Advance to the next address */ + data_addr += shdr->sh_size; + } else { + /* Adjust the address */ + bss_addr = _ALIGN(bss_addr, align); + + /* Update the section */ + shdr->sh_addr = bss_addr; + + /* Advance to the next address */ + bss_addr += shdr->sh_size; + } + } + /* Compute the relocated value for entry, and load it */ + if (entry_shdr) { + entry += entry_shdr->sh_addr; + ehdr->e_entry = entry; + } + info->entry = (void *)entry; + + /* Now that the load address is known apply relocations */ + for(shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + struct mem_shdr *section, *symtab; + const unsigned char *strtab; + size_t rel_size; + const unsigned char *ptr, *rel_end; + if ((shdr->sh_type != SHT_RELA) && (shdr->sh_type != SHT_REL)) { + continue; + } + if ((shdr->sh_info > ehdr->e_shnum) || + (shdr->sh_link > ehdr->e_shnum)) + { + die("Invalid section number\n"); + } + section = &ehdr->e_shdr[shdr->sh_info]; + symtab = &ehdr->e_shdr[shdr->sh_link]; + + if (!(section->sh_flags & SHF_ALLOC)) { + continue; + } + + if (symtab->sh_link > ehdr->e_shnum) { + /* Invalid section number? */ + continue; + } + strtab = ehdr->e_shdr[symtab->sh_link].sh_data; + + rel_size = 0; + if (shdr->sh_type == SHT_REL) { + rel_size = elf_rel_size(ehdr); + } + else if (shdr->sh_type == SHT_RELA) { + rel_size = elf_rela_size(ehdr); + } + else { + die("Cannot find elf rel size\n"); + } + rel_end = shdr->sh_data + shdr->sh_size; + for(ptr = shdr->sh_data; ptr < rel_end; ptr += rel_size) { + struct mem_rela rel = {0}; + struct mem_sym sym; + const void *location; + const unsigned char *name; + unsigned long address, value, sec_base; + if (shdr->sh_type == SHT_REL) { + rel = elf_rel(ehdr, ptr); + } + else if (shdr->sh_type == SHT_RELA) { + rel = elf_rela(ehdr, ptr); + } + /* the location to change */ + location = section->sh_data + rel.r_offset; + + /* The final address of that location */ + address = section->sh_addr + rel.r_offset; + + /* The relevant symbol */ + sym = elf_sym(ehdr, symtab->sh_data + (rel.r_sym * elf_sym_size(ehdr))); + + if (sym.st_name) { + name = strtab + sym.st_name; + } + else { + name = ehdr->e_shdr[ehdr->e_shstrndx].sh_data; + name += ehdr->e_shdr[sym.st_shndx].sh_name; + } + + dbgprintf("sym: %10s info: %02x other: %02x shndx: %x value: %llx size: %llx\n", + name, + sym.st_info, + sym.st_other, + sym.st_shndx, + sym.st_value, + sym.st_size); + + if (sym.st_shndx == STN_UNDEF) { + /* + * NOTE: ppc64 elf .ro shows up a UNDEF section. + * From Elf 1.2 Spec: + * Relocation Entries: If the index is STN_UNDEF, + * the undefined symbol index, the relocation uses 0 + * as the "symbol value". + * TOC symbols appear as undefined but should be + * resolved as well. Their type is STT_NOTYPE so allow + * such symbols to be processed. + */ + if (ELF32_ST_TYPE(sym.st_info) != STT_NOTYPE) + die("Undefined symbol: %s\n", name); + } + sec_base = 0; + if (sym.st_shndx == SHN_COMMON) { + die("symbol: '%s' in common section\n", + name); + } + else if (sym.st_shndx == SHN_ABS) { + sec_base = 0; + } + else if (sym.st_shndx > ehdr->e_shnum) { + die("Invalid section: %d for symbol %s\n", + sym.st_shndx, name); + } + else { + sec_base = ehdr->e_shdr[sym.st_shndx].sh_addr; + } + value = sym.st_value; + value += sec_base; + value += rel.r_addend; + + dbgprintf("sym: %s value: %lx addr: %lx\n", + name, value, address); + + machine_apply_elf_rel(ehdr, &sym, rel.r_type, + (void *)location, address, value); + } + } + result = 0; + out: + return result; +} + +void elf_rel_build_load(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, unsigned long min, unsigned long max, + int end, uint32_t flags) +{ + int result; + + /* Parse the Elf file */ + result = build_elf_rel_info(buf, len, ehdr, flags); + if (result < 0) { + die("ELF rel parse failed\n"); + } + /* Load the Elf data */ + result = elf_rel_load(ehdr, info, min, max, end); + if (result < 0) { + die("ELF rel load failed\n"); + } +} + +int elf_rel_find_symbol(struct mem_ehdr *ehdr, + const char *name, struct mem_sym *ret_sym) +{ + struct mem_shdr *shdr, *shdr_end; + + if (!ehdr->e_shdr) { + /* "No section header? */ + return -1; + } + /* Walk through the sections and find the symbol table */ + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + for (shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + const char *strtab; + size_t sym_size; + const unsigned char *ptr, *sym_end; + if (shdr->sh_type != SHT_SYMTAB) { + continue; + } + if (shdr->sh_link > ehdr->e_shnum) { + /* Invalid strtab section number? */ + continue; + } + strtab = (char *)ehdr->e_shdr[shdr->sh_link].sh_data; + /* Walk through the symbol table and find the symbol */ + sym_size = elf_sym_size(ehdr); + sym_end = shdr->sh_data + shdr->sh_size; + for(ptr = shdr->sh_data; ptr < sym_end; ptr += sym_size) { + struct mem_sym sym; + sym = elf_sym(ehdr, ptr); + if (ELF32_ST_BIND(sym.st_info) != STB_GLOBAL) { + continue; + } + if (strcmp(strtab + sym.st_name, name) != 0) { + continue; + } + if ((sym.st_shndx == STN_UNDEF) || + (sym.st_shndx > ehdr->e_shnum)) + { + die("Symbol: %s has Bad section index %d\n", + name, sym.st_shndx); + } + *ret_sym = sym; + return 0; + } + } + /* I did not find it :( */ + return -1; + +} + +unsigned long elf_rel_get_addr(struct mem_ehdr *ehdr, const char *name) +{ + struct mem_shdr *shdr; + struct mem_sym sym; + int result; + result = elf_rel_find_symbol(ehdr, name, &sym); + if (result < 0) { + die("Symbol: %s not found cannot retrive it's address\n", + name); + } + shdr = &ehdr->e_shdr[sym.st_shndx]; + return shdr->sh_addr + sym.st_value; +} + +void elf_rel_set_symbol(struct mem_ehdr *ehdr, + const char *name, const void *buf, size_t size) +{ + unsigned char *sym_buf; + struct mem_shdr *shdr; + struct mem_sym sym; + int result; + + result = elf_rel_find_symbol(ehdr, name, &sym); + if (result < 0) { + die("Symbol: %s not found cannot set\n", + name); + } + if (sym.st_size != size) { + die("Symbol: %s has size: %lld not %zd\n", + name, sym.st_size, size); + } + shdr = &ehdr->e_shdr[sym.st_shndx]; + if (shdr->sh_type == SHT_NOBITS) { + die("Symbol: %s is in a bss section cannot set\n", name); + } + sym_buf = (unsigned char *)(shdr->sh_data + sym.st_value); + memcpy(sym_buf, buf, size); +} + +void elf_rel_get_symbol(struct mem_ehdr *ehdr, + const char *name, void *buf, size_t size) +{ + const unsigned char *sym_buf; + struct mem_shdr *shdr; + struct mem_sym sym; + int result; + + result = elf_rel_find_symbol(ehdr, name, &sym); + if (result < 0) { + die("Symbol: %s not found cannot get\n", name); + } + if (sym.st_size != size) { + die("Symbol: %s has size: %lld not %zd\n", + name, sym.st_size, size); + } + shdr = &ehdr->e_shdr[sym.st_shndx]; + if (shdr->sh_type == SHT_NOBITS) { + die("Symbol: %s is in a bss section cannot set\n", name); + } + sym_buf = shdr->sh_data + sym.st_value; + memcpy(buf, sym_buf,size); +} diff --git a/kexec/kexec-elf.c b/kexec/kexec-elf.c new file mode 100644 index 0000000..be60bbd --- /dev/null +++ b/kexec/kexec-elf.c @@ -0,0 +1,787 @@ +#include <limits.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include "elf.h" +#include <boot/elf_boot.h> +#include "kexec.h" +#include "kexec-elf.h" +#include "crashdump.h" + +static const int probe_debug = 0; + +uint16_t elf16_to_cpu(const struct mem_ehdr *ehdr, uint16_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = le16_to_cpu(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = be16_to_cpu(value); + } + return value; +} + +uint32_t elf32_to_cpu(const struct mem_ehdr *ehdr, uint32_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = le32_to_cpu(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = be32_to_cpu(value); + } + return value; +} + +uint64_t elf64_to_cpu(const struct mem_ehdr *ehdr, uint64_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = le64_to_cpu(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = be64_to_cpu(value); + } + return value; +} + +uint16_t cpu_to_elf16(const struct mem_ehdr *ehdr, uint16_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = cpu_to_le16(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = cpu_to_be16(value); + } + return value; +} + +uint32_t cpu_to_elf32(const struct mem_ehdr *ehdr, uint32_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = cpu_to_le32(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = cpu_to_be32(value); + } + return value; +} + +uint64_t cpu_to_elf64(const struct mem_ehdr *ehdr, uint64_t value) +{ + if (ehdr->ei_data == ELFDATA2LSB) { + value = cpu_to_le64(value); + } + else if (ehdr->ei_data == ELFDATA2MSB) { + value = cpu_to_be64(value); + } + return value; +} + +#define ELF32_MAX 0xffffffff +#define ELF64_MAX 0xffffffffffffffff +#if ELF64_MAX > ULONG_MAX +#undef ELF64_MAX +#define ELF64_MAX ULONG_MAX +#endif + +unsigned long elf_max_addr(const struct mem_ehdr *ehdr) +{ + unsigned long max_addr = 0; + if (ehdr->ei_class == ELFCLASS32) { + max_addr = ELF32_MAX; + } + else if (ehdr->ei_class == ELFCLASS64) { + max_addr = ELF64_MAX; + } + return max_addr; +} +static int build_mem_elf32_ehdr(const char *buf, off_t len, struct mem_ehdr *ehdr) +{ + Elf32_Ehdr lehdr; + if ((uintmax_t)len < (uintmax_t)sizeof(lehdr)) { + /* Buffer is to small to be an elf executable */ + if (probe_debug) { + fprintf(stderr, "Buffer is to small to hold ELF header\n"); + } + return -1; + } + memcpy(&lehdr, buf, sizeof(lehdr)); + if (elf16_to_cpu(ehdr, lehdr.e_ehsize) != sizeof(Elf32_Ehdr)) { + /* Invalid Elf header size */ + if (probe_debug) { + fprintf(stderr, "Bad ELF header size\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_entry) > UINT32_MAX) { + /* entry is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_entry to large\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_phoff) > UINT32_MAX) { + /* phoff is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_phoff to large\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_shoff) > UINT32_MAX) { + /* shoff is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_shoff to large\n"); + } + return -1; + } + ehdr->e_type = elf16_to_cpu(ehdr, lehdr.e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, lehdr.e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, lehdr.e_version); + ehdr->e_entry = elf32_to_cpu(ehdr, lehdr.e_entry); + ehdr->e_phoff = elf32_to_cpu(ehdr, lehdr.e_phoff); + ehdr->e_shoff = elf32_to_cpu(ehdr, lehdr.e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, lehdr.e_flags); + ehdr->e_phnum = elf16_to_cpu(ehdr, lehdr.e_phnum); + ehdr->e_shnum = elf16_to_cpu(ehdr, lehdr.e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, lehdr.e_shstrndx); + + if ((ehdr->e_phnum > 0) && + (elf16_to_cpu(ehdr, lehdr.e_phentsize) != sizeof(Elf32_Phdr))) + { + /* Invalid program header size */ + if (probe_debug) { + fprintf(stderr, "ELF bad program header size\n"); + } + return -1; + } + if ((ehdr->e_shnum > 0) && + (elf16_to_cpu(ehdr, lehdr.e_shentsize) != sizeof(Elf32_Shdr))) + { + /* Invalid section header size */ + if (probe_debug) { + fprintf(stderr, "ELF bad section header size\n"); + } + return -1; + } + + return 0; +} + +static int build_mem_elf64_ehdr(const char *buf, off_t len, struct mem_ehdr *ehdr) +{ + Elf64_Ehdr lehdr; + if ((uintmax_t)len < (uintmax_t)sizeof(lehdr)) { + /* Buffer is to small to be an elf executable */ + if (probe_debug) { + fprintf(stderr, "Buffer is to small to hold ELF header\n"); + } + return -1; + } + memcpy(&lehdr, buf, sizeof(lehdr)); + if (elf16_to_cpu(ehdr, lehdr.e_ehsize) != sizeof(Elf64_Ehdr)) { + /* Invalid Elf header size */ + if (probe_debug) { + fprintf(stderr, "Bad ELF header size\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_entry) > UINT32_MAX) { + /* entry is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_entry to large\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_phoff) > UINT32_MAX) { + /* phoff is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_phoff to large\n"); + } + return -1; + } + if (elf32_to_cpu(ehdr, lehdr.e_shoff) > UINT32_MAX) { + /* shoff is to large */ + if (probe_debug) { + fprintf(stderr, "ELF e_shoff to large\n"); + } + return -1; + } + ehdr->e_type = elf16_to_cpu(ehdr, lehdr.e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, lehdr.e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, lehdr.e_version); + ehdr->e_entry = elf64_to_cpu(ehdr, lehdr.e_entry); + ehdr->e_phoff = elf64_to_cpu(ehdr, lehdr.e_phoff); + ehdr->e_shoff = elf64_to_cpu(ehdr, lehdr.e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, lehdr.e_flags); + ehdr->e_phnum = elf16_to_cpu(ehdr, lehdr.e_phnum); + ehdr->e_shnum = elf16_to_cpu(ehdr, lehdr.e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, lehdr.e_shstrndx); + + if ((ehdr->e_phnum > 0) && + (elf16_to_cpu(ehdr, lehdr.e_phentsize) != sizeof(Elf64_Phdr))) + { + /* Invalid program header size */ + if (probe_debug) { + fprintf(stderr, "ELF bad program header size\n"); + } + return -1; + } + if ((ehdr->e_shnum > 0) && + (elf16_to_cpu(ehdr, lehdr.e_shentsize) != sizeof(Elf64_Shdr))) + { + /* Invalid section header size */ + if (probe_debug) { + fprintf(stderr, "ELF bad section header size\n"); + } + return -1; + } + + return 0; +} + +static int build_mem_ehdr(const char *buf, off_t len, struct mem_ehdr *ehdr) +{ + unsigned char e_ident[EI_NIDENT]; + int result; + memset(ehdr, 0, sizeof(*ehdr)); + if ((uintmax_t)len < (uintmax_t)sizeof(e_ident)) { + /* Buffer is to small to be an elf executable */ + if (probe_debug) { + fprintf(stderr, "Buffer is to small to hold ELF e_ident\n"); + } + return -1; + } + memcpy(e_ident, buf, sizeof(e_ident)); + if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { + /* No ELF header magic */ + if (probe_debug) { + fprintf(stderr, "NO ELF header magic\n"); + } + return -1; + } + ehdr->ei_class = e_ident[EI_CLASS]; + ehdr->ei_data = e_ident[EI_DATA]; + if ( (ehdr->ei_class != ELFCLASS32) && + (ehdr->ei_class != ELFCLASS64)) + { + /* Not a supported elf class */ + if (probe_debug) { + fprintf(stderr, "Not a supported ELF class\n"); + } + return -1; + } + if ( (ehdr->ei_data != ELFDATA2LSB) && + (ehdr->ei_data != ELFDATA2MSB)) + { + /* Not a supported elf data type */ + if (probe_debug) { + fprintf(stderr, "Not a supported ELF data format\n"); + } + return -1; + } + + result = -1; + if (ehdr->ei_class == ELFCLASS32) { + result = build_mem_elf32_ehdr(buf, len, ehdr); + } + else if (ehdr->ei_class == ELFCLASS64) { + result = build_mem_elf64_ehdr(buf, len, ehdr); + } + if (result < 0) { + return result; + } + if ((e_ident[EI_VERSION] != EV_CURRENT) || + (ehdr->e_version != EV_CURRENT)) + { + if (probe_debug) { + fprintf(stderr, "Unknown ELF version\n"); + } + /* Unknwon elf version */ + return -1; + } + return 0; +} + +static int build_mem_elf32_phdr(const char *buf, struct mem_ehdr *ehdr, int idx) +{ + struct mem_phdr *phdr; + const char *pbuf; + Elf32_Phdr lphdr; + pbuf = buf + ehdr->e_phoff + (idx * sizeof(lphdr)); + phdr = &ehdr->e_phdr[idx]; + memcpy(&lphdr, pbuf, sizeof(lphdr)); + + if ( (elf32_to_cpu(ehdr, lphdr.p_filesz) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lphdr.p_memsz) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lphdr.p_offset) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lphdr.p_paddr) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lphdr.p_vaddr) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lphdr.p_align) > UINT32_MAX)) + { + fprintf(stderr, "Program segment size out of range\n"); + return -1; + } + + phdr->p_type = elf32_to_cpu(ehdr, lphdr.p_type); + phdr->p_paddr = elf32_to_cpu(ehdr, lphdr.p_paddr); + phdr->p_vaddr = elf32_to_cpu(ehdr, lphdr.p_vaddr); + phdr->p_filesz = elf32_to_cpu(ehdr, lphdr.p_filesz); + phdr->p_memsz = elf32_to_cpu(ehdr, lphdr.p_memsz); + phdr->p_offset = elf32_to_cpu(ehdr, lphdr.p_offset); + phdr->p_flags = elf32_to_cpu(ehdr, lphdr.p_flags); + phdr->p_align = elf32_to_cpu(ehdr, lphdr.p_align); + + return 0; +} + +static int build_mem_elf64_phdr(const char *buf, struct mem_ehdr *ehdr, int idx) +{ + struct mem_phdr *phdr; + const char *pbuf; + Elf64_Phdr lphdr; + pbuf = buf + ehdr->e_phoff + (idx * sizeof(lphdr)); + phdr = &ehdr->e_phdr[idx]; + memcpy(&lphdr, pbuf, sizeof(lphdr)); + + if ( (elf64_to_cpu(ehdr, lphdr.p_filesz) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lphdr.p_memsz) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lphdr.p_offset) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lphdr.p_paddr) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lphdr.p_vaddr) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lphdr.p_align) > UINT64_MAX)) + { + fprintf(stderr, "Program segment size out of range\n"); + return -1; + } + + phdr->p_type = elf32_to_cpu(ehdr, lphdr.p_type); + phdr->p_paddr = elf64_to_cpu(ehdr, lphdr.p_paddr); + phdr->p_vaddr = elf64_to_cpu(ehdr, lphdr.p_vaddr); + phdr->p_filesz = elf64_to_cpu(ehdr, lphdr.p_filesz); + phdr->p_memsz = elf64_to_cpu(ehdr, lphdr.p_memsz); + phdr->p_offset = elf64_to_cpu(ehdr, lphdr.p_offset); + phdr->p_flags = elf32_to_cpu(ehdr, lphdr.p_flags); + phdr->p_align = elf64_to_cpu(ehdr, lphdr.p_align); + + return 0; +} + +static int build_mem_phdrs(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + size_t phdr_size, mem_phdr_size, i; + + /* e_phnum is at most 65535 so calculating + * the size of the program header cannot overflow. + */ + /* Is the program header in the file buffer? */ + phdr_size = 0; + if (ehdr->ei_class == ELFCLASS32) { + phdr_size = sizeof(Elf32_Phdr); + } + else if (ehdr->ei_class == ELFCLASS64) { + phdr_size = sizeof(Elf64_Phdr); + } + else { + fprintf(stderr, "Invalid ei_class?\n"); + return -1; + } + phdr_size *= ehdr->e_phnum; + if ((uintmax_t)(ehdr->e_phoff + phdr_size) > (uintmax_t)len) { + /* The program header did not fit in the file buffer */ + if (probe_debug || (flags & ELF_SKIP_FILESZ_CHECK)) { + fprintf(stderr, "ELF program headers truncated" + " have %ju bytes need %ju bytes\n", + (uintmax_t)len, + (uintmax_t)(ehdr->e_phoff + phdr_size)); + } + return -1; + } + + /* Allocate the e_phdr array */ + mem_phdr_size = sizeof(ehdr->e_phdr[0]) * ehdr->e_phnum; + ehdr->e_phdr = xmalloc(mem_phdr_size); + + for(i = 0; i < ehdr->e_phnum; i++) { + struct mem_phdr *phdr; + int result; + result = -1; + if (ehdr->ei_class == ELFCLASS32) { + result = build_mem_elf32_phdr(buf, ehdr, i); + + } + else if (ehdr->ei_class == ELFCLASS64) { + result = build_mem_elf64_phdr(buf, ehdr, i); + } + if (result < 0) { + return result; + } + + /* Check the program headers to be certain + * they are safe to use. + * Skip the check if ELF_SKIP_FILESZ_CHECK is set. + */ + phdr = &ehdr->e_phdr[i]; + if (!(flags & ELF_SKIP_FILESZ_CHECK) + && (uintmax_t)(phdr->p_offset + phdr->p_filesz) > + (uintmax_t)len) { + /* The segment does not fit in the buffer */ + if (probe_debug) { + fprintf(stderr, "ELF segment not in file\n"); + } + return -1; + } + if (phdr->p_paddr != (unsigned long long)-1 && + (phdr->p_paddr + phdr->p_memsz) < phdr->p_paddr) { + /* The memory address wraps */ + if (probe_debug) { + fprintf(stderr, "ELF address wrap around\n"); + } + return -1; + } + /* Remember where the segment lives in the buffer */ + phdr->p_data = buf + phdr->p_offset; + } + return 0; +} + +static int build_mem_elf32_shdr(const char *buf, struct mem_ehdr *ehdr, int idx) +{ + struct mem_shdr *shdr; + const char *sbuf; + int size_ok; + Elf32_Shdr lshdr; + sbuf = buf + ehdr->e_shoff + (idx * sizeof(lshdr)); + shdr = &ehdr->e_shdr[idx]; + memcpy(&lshdr, sbuf, sizeof(lshdr)); + + if ( (elf32_to_cpu(ehdr, lshdr.sh_flags) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lshdr.sh_addr) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lshdr.sh_offset) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lshdr.sh_size) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lshdr.sh_addralign) > UINT32_MAX) || + (elf32_to_cpu(ehdr, lshdr.sh_entsize) > UINT32_MAX)) + { + fprintf(stderr, "Program section size out of range\n"); + return -1; + } + + shdr->sh_name = elf32_to_cpu(ehdr, lshdr.sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, lshdr.sh_type); + shdr->sh_flags = elf32_to_cpu(ehdr, lshdr.sh_flags); + shdr->sh_addr = elf32_to_cpu(ehdr, lshdr.sh_addr); + shdr->sh_offset = elf32_to_cpu(ehdr, lshdr.sh_offset); + shdr->sh_size = elf32_to_cpu(ehdr, lshdr.sh_size); + shdr->sh_link = elf32_to_cpu(ehdr, lshdr.sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, lshdr.sh_info); + shdr->sh_addralign = elf32_to_cpu(ehdr, lshdr.sh_addralign); + shdr->sh_entsize = elf32_to_cpu(ehdr, lshdr.sh_entsize); + + /* Now verify sh_entsize */ + size_ok = 0; + switch(shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf32_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf32_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf32_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf32_Rel); + break; + case SHT_NOTE: + case SHT_NULL: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = 1; + break; + } + if (!size_ok) { + fprintf(stderr, "Bad section header(%x) entsize: %lld\n", + shdr->sh_type, shdr->sh_entsize); + return -1; + } + return 0; +} + +static int build_mem_elf64_shdr(const char *buf, struct mem_ehdr *ehdr, int idx) +{ + struct mem_shdr *shdr; + const char *sbuf; + int size_ok; + Elf64_Shdr lshdr; + sbuf = buf + ehdr->e_shoff + (idx * sizeof(lshdr)); + shdr = &ehdr->e_shdr[idx]; + memcpy(&lshdr, sbuf, sizeof(lshdr)); + + if ( (elf64_to_cpu(ehdr, lshdr.sh_flags) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lshdr.sh_addr) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lshdr.sh_offset) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lshdr.sh_size) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lshdr.sh_addralign) > UINT64_MAX) || + (elf64_to_cpu(ehdr, lshdr.sh_entsize) > UINT64_MAX)) + { + fprintf(stderr, "Program section size out of range\n"); + return -1; + } + + shdr->sh_name = elf32_to_cpu(ehdr, lshdr.sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, lshdr.sh_type); + shdr->sh_flags = elf64_to_cpu(ehdr, lshdr.sh_flags); + shdr->sh_addr = elf64_to_cpu(ehdr, lshdr.sh_addr); + shdr->sh_offset = elf64_to_cpu(ehdr, lshdr.sh_offset); + shdr->sh_size = elf64_to_cpu(ehdr, lshdr.sh_size); + shdr->sh_link = elf32_to_cpu(ehdr, lshdr.sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, lshdr.sh_info); + shdr->sh_addralign = elf64_to_cpu(ehdr, lshdr.sh_addralign); + shdr->sh_entsize = elf64_to_cpu(ehdr, lshdr.sh_entsize); + + /* Now verify sh_entsize */ + size_ok = 0; + switch(shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf64_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf64_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf64_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf64_Rel); + break; + case SHT_NOTE: + case SHT_NULL: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = 1; + break; + } + if (!size_ok) { + fprintf(stderr, "Bad section header(%x) entsize: %lld\n", + shdr->sh_type, shdr->sh_entsize); + return -1; + } + return 0; +} + +static int build_mem_shdrs(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + size_t shdr_size, mem_shdr_size, i; + + /* e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + /* Is the program header in the file buffer? */ + shdr_size = 0; + if (ehdr->ei_class == ELFCLASS32) { + shdr_size = sizeof(Elf32_Shdr); + } + else if (ehdr->ei_class == ELFCLASS64) { + shdr_size = sizeof(Elf64_Shdr); + } + else { + fprintf(stderr, "Invalid ei_class?\n"); + return -1; + } + shdr_size *= ehdr->e_shnum; + if ((uintmax_t)(ehdr->e_shoff + shdr_size) > (uintmax_t)len) { + /* The section header did not fit in the file buffer */ + if (probe_debug) { + fprintf(stderr, "ELF section header does not fit in file\n"); + } + return -1; + } + + /* Allocate the e_shdr array */ + mem_shdr_size = sizeof(ehdr->e_shdr[0]) * ehdr->e_shnum; + ehdr->e_shdr = xmalloc(mem_shdr_size); + + for(i = 0; i < ehdr->e_shnum; i++) { + struct mem_shdr *shdr; + int result; + result = -1; + if (ehdr->ei_class == ELFCLASS32) { + result = build_mem_elf32_shdr(buf, ehdr, i); + } + else if (ehdr->ei_class == ELFCLASS64) { + result = build_mem_elf64_shdr(buf, ehdr, i); + } + if (result < 0) { + return result; + } + /* Check the section headers to be certain + * they are safe to use. + * Skip the check if ELF_SKIP_FILESZ_CHECK is set. + */ + shdr = &ehdr->e_shdr[i]; + if (!(flags & ELF_SKIP_FILESZ_CHECK) + && (shdr->sh_type != SHT_NOBITS) + && (uintmax_t)(shdr->sh_offset + shdr->sh_size) > + (uintmax_t)len) { + /* The section does not fit in the buffer */ + if (probe_debug) { + fprintf(stderr, "ELF section %zd not in file\n", + i); + } + return -1; + } + if ((shdr->sh_addr + shdr->sh_size) < shdr->sh_addr) { + /* The memory address wraps */ + if (probe_debug) { + fprintf(stderr, "ELF address wrap around\n"); + } + return -1; + } + /* Remember where the section lives in the buffer */ + shdr->sh_data = (unsigned char *)(buf + shdr->sh_offset); + } + return 0; +} + +static void read_nhdr(const struct mem_ehdr *ehdr, + ElfNN_Nhdr *hdr, const unsigned char *note) +{ + memcpy(hdr, note, sizeof(*hdr)); + hdr->n_namesz = elf32_to_cpu(ehdr, hdr->n_namesz); + hdr->n_descsz = elf32_to_cpu(ehdr, hdr->n_descsz); + hdr->n_type = elf32_to_cpu(ehdr, hdr->n_type); + +} +static int build_mem_notes(struct mem_ehdr *ehdr) +{ + const unsigned char *note_start, *note_end, *note; + size_t note_size, i; + /* First find the note segment or section */ + note_start = note_end = NULL; + for(i = 0; !note_start && (i < ehdr->e_phnum); i++) { + struct mem_phdr *phdr = &ehdr->e_phdr[i]; + /* + * binutils <= 2.17 has a bug where it can create the + * PT_NOTE segment with an offset of 0. Therefore + * check p_offset > 0. + * + * See: http://sourceware.org/bugzilla/show_bug.cgi?id=594 + */ + if (phdr->p_type == PT_NOTE && phdr->p_offset) { + note_start = (unsigned char *)phdr->p_data; + note_end = note_start + phdr->p_filesz; + } + } + for(i = 0; !note_start && (i < ehdr->e_shnum); i++) { + struct mem_shdr *shdr = &ehdr->e_shdr[i]; + if (shdr->sh_type == SHT_NOTE) { + note_start = shdr->sh_data; + note_end = note_start + shdr->sh_size; + } + } + if (!note_start) { + return 0; + } + + /* Walk through and count the notes */ + ehdr->e_notenum = 0; + for(note = note_start; note < note_end; note+= note_size) { + ElfNN_Nhdr hdr; + read_nhdr(ehdr, &hdr, note); + note_size = sizeof(hdr); + note_size += _ALIGN(hdr.n_namesz, 4); + note_size += _ALIGN(hdr.n_descsz, 4); + ehdr->e_notenum += 1; + } + /* Now walk and normalize the notes */ + ehdr->e_note = xmalloc(sizeof(*ehdr->e_note) * ehdr->e_notenum); + for(i = 0, note = note_start; note < note_end; note+= note_size, i++) { + const unsigned char *name, *desc; + ElfNN_Nhdr hdr; + read_nhdr(ehdr, &hdr, note); + note_size = sizeof(hdr); + name = note + note_size; + note_size += _ALIGN(hdr.n_namesz, 4); + desc = note + note_size; + note_size += _ALIGN(hdr.n_descsz, 4); + + if (((note+note_size) > note_end) || + ((note+note_size) < note_start)) { + /* Something is very wrong here ! Most likely the note + * header is invalid */ + fprintf(stderr, "ELF Note corrupted !\n"); + return -1; + } + + if ((hdr.n_namesz != 0) && (name[hdr.n_namesz -1] != '\0')) { + /* If note name string is not null terminated, just + * warn user about it and continue processing. This + * allows us to parse /proc/kcore on older kernels + * where /proc/kcore elf notes were not null + * terminated. It has been fixed in 2.6.19. + */ + fprintf(stderr, "Warning: Elf Note name is not null " + "terminated\n"); + } + ehdr->e_note[i].n_type = hdr.n_type; + ehdr->e_note[i].n_name = (char *)name; + ehdr->e_note[i].n_desc = desc; + ehdr->e_note[i].n_descsz = hdr.n_descsz; + + } + return 0; +} + +void free_elf_info(struct mem_ehdr *ehdr) +{ + free(ehdr->e_phdr); + free(ehdr->e_shdr); + memset(ehdr, 0, sizeof(*ehdr)); +} + +int build_elf_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags) +{ + int result; + result = build_mem_ehdr(buf, len, ehdr); + if (result < 0) { + return result; + } + if ((ehdr->e_phoff > 0) && (ehdr->e_phnum > 0)) { + result = build_mem_phdrs(buf, len, ehdr, flags); + if (result < 0) { + free_elf_info(ehdr); + return result; + } + } + if ((ehdr->e_shoff > 0) && (ehdr->e_shnum > 0)) { + result = build_mem_shdrs(buf, len, ehdr, flags); + if (result < 0) { + free_elf_info(ehdr); + return result; + } + } + result = build_mem_notes(ehdr); + if (result < 0) { + free_elf_info(ehdr); + return result; + } + return 0; +} + diff --git a/kexec/kexec-elf.h b/kexec/kexec-elf.h new file mode 100644 index 0000000..1e512c8 --- /dev/null +++ b/kexec/kexec-elf.h @@ -0,0 +1,143 @@ +#ifndef KEXEC_ELF_H +#define KEXEC_ELF_H + +#include <stdint.h> +#include <sys/types.h> + +struct kexec_info; + +struct mem_ehdr { + unsigned ei_class; + unsigned ei_data; + unsigned e_type; + unsigned e_machine; + unsigned e_version; + unsigned e_flags; + unsigned e_phnum; + unsigned e_shnum; + unsigned e_shstrndx; + unsigned long long e_entry; + unsigned long long e_phoff; + unsigned long long e_shoff; + unsigned e_notenum; + struct mem_phdr *e_phdr; + struct mem_shdr *e_shdr; + struct mem_note *e_note; + unsigned long rel_addr, rel_size; +}; + +struct mem_phdr { + unsigned long long p_paddr; + unsigned long long p_vaddr; + unsigned long long p_filesz; + unsigned long long p_memsz; + unsigned long long p_offset; + const char *p_data; + unsigned p_type; + unsigned p_flags; + unsigned long long p_align; +}; + +struct mem_shdr { + unsigned sh_name; + unsigned sh_type; + unsigned long long sh_flags; + unsigned long long sh_addr; + unsigned long long sh_offset; + unsigned long long sh_size; + unsigned sh_link; + unsigned sh_info; + unsigned long long sh_addralign; + unsigned long long sh_entsize; + const unsigned char *sh_data; +}; + +struct mem_sym { + unsigned long st_name; /* Symbol name (string tbl index) */ + unsigned char st_info; /* No defined meaning, 0 */ + unsigned char st_other; /* Symbol type and binding */ + unsigned st_shndx; /* Section index */ + unsigned long long st_value; /* Symbol value */ + unsigned long long st_size; /* Symbol size */ +}; + +struct mem_rela { + unsigned long long r_offset; + unsigned r_sym; + unsigned r_type; + unsigned long long r_addend; +}; + +struct mem_note { + unsigned n_type; + unsigned n_descsz; + const char *n_name; + const void *n_desc; +}; + +/* The definition of an ELF note does not vary depending + * on ELFCLASS. + */ +typedef struct +{ + uint32_t n_namesz; /* Length of the note's name. */ + uint32_t n_descsz; /* Length of the note's descriptor. */ + uint32_t n_type; /* Type of the note. */ +} ElfNN_Nhdr; + +/* Misc flags */ + +#define ELF_SKIP_FILESZ_CHECK 0x00000001 + +extern void free_elf_info(struct mem_ehdr *ehdr); +extern int build_elf_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags); +extern int build_elf_exec_info(const char *buf, off_t len, + struct mem_ehdr *ehdr, uint32_t flags); +extern int build_elf_rel_info(const char *buf, off_t len, struct mem_ehdr *ehdr, + uint32_t flags); + +extern int build_elf_core_info(const char *buf, off_t len, + struct mem_ehdr *ehdr, uint32_t flags); +extern int elf_exec_load(struct mem_ehdr *ehdr, struct kexec_info *info); +extern int elf_exec_load_relocatable(struct mem_ehdr *ehdr, struct kexec_info *info, + unsigned long reloc_min, unsigned long reloc_max, + unsigned long align); +extern int elf_rel_load(struct mem_ehdr *ehdr, struct kexec_info *info, + unsigned long min, unsigned long max, int end); + +extern void elf_exec_build_load(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, uint32_t flags); +extern void elf_exec_build_load_relocatable(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, uint32_t flags, + unsigned long reloc_min, unsigned long reloc_max, + unsigned long align); +extern void elf_rel_build_load(struct kexec_info *info, struct mem_ehdr *ehdr, + const char *buf, off_t len, unsigned long min, unsigned long max, + int end, uint32_t flags); + +extern int elf_rel_find_symbol(struct mem_ehdr *ehdr, + const char *name, struct mem_sym *ret_sym); +extern unsigned long elf_rel_get_addr(struct mem_ehdr *ehdr, const char *name); +extern void elf_rel_set_symbol(struct mem_ehdr *ehdr, + const char *name, const void *buf, size_t size); +extern void elf_rel_get_symbol(struct mem_ehdr *ehdr, + const char *name, void *buf, size_t size); + +uint16_t elf16_to_cpu(const struct mem_ehdr *ehdr, uint16_t value); +uint32_t elf32_to_cpu(const struct mem_ehdr *ehdr, uint32_t value); +uint64_t elf64_to_cpu(const struct mem_ehdr *ehdr, uint64_t value); + +uint16_t cpu_to_elf16(const struct mem_ehdr *ehdr, uint16_t value); +uint32_t cpu_to_elf32(const struct mem_ehdr *ehdr, uint32_t value); +uint64_t cpu_to_elf64(const struct mem_ehdr *ehdr, uint64_t value); + +unsigned long elf_max_addr(const struct mem_ehdr *ehdr); + +/* Architecture specific helper functions */ +extern int machine_verify_elf_rel(struct mem_ehdr *ehdr); +extern void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *sym, + unsigned long r_type, void *location, unsigned long address, + unsigned long value); +#endif /* KEXEC_ELF_H */ + diff --git a/kexec/kexec-iomem.c b/kexec/kexec-iomem.c new file mode 100644 index 0000000..d00b6b6 --- /dev/null +++ b/kexec/kexec-iomem.c @@ -0,0 +1,109 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include "kexec.h" +#include "crashdump.h" + +/* + * kexec_iomem_for_each_line() + * + * Iterate over each line in the file returned by proc_iomem(). If match is + * NULL or if the line matches with our match-pattern then call the + * callback if non-NULL. + * If match is NULL, callback should return a negative if error. + * Otherwise the interation goes on, incrementing nr but only if callback + * returns 0 (matched). + * + * Return the number of lines matched. + */ + +int kexec_iomem_for_each_line(char *match, + int (*callback)(void *data, + int nr, + char *str, + unsigned long long base, + unsigned long long length), + void *data) +{ + const char *iomem = proc_iomem(); + char line[MAX_LINE]; + FILE *fp; + unsigned long long start, end, size; + char *str; + int consumed; + int count; + int nr = 0, ret; + + if (!callback) + return nr; + + fp = fopen(iomem, "r"); + if (!fp) + die("Cannot open %s\n", iomem); + + while(fgets(line, sizeof(line), fp) != 0) { + count = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + size = end - start + 1; + if (!match || memcmp(str, match, strlen(match)) == 0) { + ret = callback(data, nr, str, start, size); + if (ret < 0) + break; + else if (ret == 0) + nr++; + } + } + + fclose(fp); + + return nr; +} + +static int kexec_iomem_single_callback(void *data, int nr, + char *UNUSED(str), + unsigned long long base, + unsigned long long length) +{ + struct memory_range *range = data; + + if (nr == 0) { + range->start = base; + range->end = base + length - 1; + } + + return 0; +} + +int parse_iomem_single(char *str, uint64_t *start, uint64_t *end) +{ + struct memory_range range; + int ret; + + memset(&range, 0, sizeof(range)); + + ret = kexec_iomem_for_each_line(str, + kexec_iomem_single_callback, &range); + + if (ret == 1) { + if (start) + *start = range.start; + if (end) + *end = range.end; + + ret = 0; + } + else + ret = -1; + + return ret; +} diff --git a/kexec/kexec-lzma.h b/kexec/kexec-lzma.h new file mode 100644 index 0000000..d3b751a --- /dev/null +++ b/kexec/kexec-lzma.h @@ -0,0 +1,8 @@ +#ifndef __KEXEC_LZMA_H +#define __KEXEC_LZMA_H + +#include <sys/types.h> + +char *lzma_decompress_file(const char *filename, off_t *r_size); + +#endif /* __KEXEC_LZMA_H */ diff --git a/kexec/kexec-pe-zboot.c b/kexec/kexec-pe-zboot.c new file mode 100644 index 0000000..2f2e052 --- /dev/null +++ b/kexec/kexec-pe-zboot.c @@ -0,0 +1,131 @@ +/* + * Generic PE compressed Image (vmlinuz, ZBOOT) support. + * + * Several distros use 'make zinstall' with CONFIG_ZBOOT + * enabled to create UEFI PE images that contain + * a decompressor and a compressed kernel image. + * + * Currently we cannot use kexec_file_load() to load vmlinuz + * PE images that self decompress. + * + * To support ZBOOT, we should: + * a). Copy the compressed contents of vmlinuz to a temporary file. + * b). Decompress (gunzip-decompress) the contents inside the + * temporary file. + * c). Validate the resulting image and write it back to the + * temporary file. + * d). Pass the 'fd' of the temporary file to the kernel space. + * + * This module contains the arch independent code for the above, + * arch specific PE and image checks should wrap calls + * to functions in this module. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdint.h> +#include <sys/types.h> +#include <unistd.h> +#include <fcntl.h> +#include "kexec.h" +#include <kexec-pe-zboot.h> + +#define FILENAME_IMAGE "/tmp/ImageXXXXXX" + +/* + * Returns -1 : in case of error/invalid format (not a valid PE+compressed ZBOOT format. + * + * crude_buf: the content, which is read from the kernel file without any processing + */ +int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd) +{ + int ret = -1; + int fd = 0; + char *fname = NULL; + char *kernel_uncompressed_buf = NULL; + off_t decompressed_size = 0; + const struct linux_pe_zboot_header *z; + + z = (const struct linux_pe_zboot_header *)(crude_buf); + + if (memcmp(&z->image_type, "zimg", sizeof(z->image_type))) { + dbgprintf("%s: PE doesn't contain a compressed kernel.\n", __func__); + return -1; + } + + /* + * At the moment its possible to create images with more compression + * algorithms than are supported here, error out if we detect that. + */ + if (memcmp(&z->compress_type, "gzip", 4) && + memcmp(&z->compress_type, "lzma", 4)) { + dbgprintf("%s: kexec can only decompress gziped and lzma images.\n", __func__); + return -1; + } + + if (buf_sz < z->payload_offset + z->payload_size) { + dbgprintf("%s: PE too small to contain complete payload.\n", __func__); + return -1; + } + + if (!(fname = strdup(FILENAME_IMAGE))) { + dbgprintf("%s: Can't duplicate strings\n", __func__); + return -1; + } + + if ((fd = mkstemp(fname)) < 0) { + dbgprintf("%s: Can't open file %s\n", __func__, fname); + ret = -1; + goto fail_mkstemp; + } + + if (write(fd, &crude_buf[z->payload_offset], + z->payload_size) != z->payload_size) { + dbgprintf("%s: Can't write the compressed file %s\n", + __func__, fname); + ret = -1; + goto fail_write; + } + + kernel_uncompressed_buf = slurp_decompress_file(fname, + &decompressed_size); + + dbgprintf("%s: decompressed size %ld\n", __func__, decompressed_size); + + lseek(fd, 0, SEEK_SET); + + if (write(fd, kernel_uncompressed_buf, + decompressed_size) != decompressed_size) { + dbgprintf("%s: Can't write the decompressed file %s\n", + __func__, fname); + ret = -1; + goto fail_bad_header; + } + + *kernel_fd = open(fname, O_RDONLY); + if (*kernel_fd == -1) { + dbgprintf("%s: Failed to open file %s\n", + __func__, fname); + ret = -1; + goto fail_bad_header; + } + + dbgprintf("%s: done\n", __func__); + + ret = 0; + goto fail_write; + +fail_bad_header: + free(kernel_uncompressed_buf); + +fail_write: + if (fd >= 0) + close(fd); + + unlink(fname); + +fail_mkstemp: + free(fname); + + return ret; +} diff --git a/kexec/kexec-sha256.h b/kexec/kexec-sha256.h new file mode 100644 index 0000000..b4bb856 --- /dev/null +++ b/kexec/kexec-sha256.h @@ -0,0 +1,11 @@ +#ifndef KEXEC_SHA256_H +#define KEXEC_SHA256_H + +struct sha256_region { + uint64_t start; + uint64_t len; +}; + +#define SHA256_REGIONS 16 + +#endif /* KEXEC_SHA256_H */ diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h new file mode 100644 index 0000000..1e2d12f --- /dev/null +++ b/kexec/kexec-syscall.h @@ -0,0 +1,188 @@ +#ifndef KEXEC_SYSCALL_H +#define KEXEC_SYSCALL_H + +#define __LIBRARY__ +#include <sys/syscall.h> +#include <unistd.h> + +#define LINUX_REBOOT_CMD_KEXEC_OLD 0x81726354 +#define LINUX_REBOOT_CMD_KEXEC_OLD2 0x18263645 +#define LINUX_REBOOT_CMD_KEXEC 0x45584543 + +#ifndef __NR_kexec_load +#ifdef __i386__ +#define __NR_kexec_load 283 +#endif +#ifdef __sh__ +#define __NR_kexec_load 283 +#endif +#ifdef __cris__ +#ifndef __NR_kexec_load +#define __NR_kexec_load 283 +#endif +#endif +#ifdef __ia64__ +#define __NR_kexec_load 1268 +#endif +#ifdef __powerpc64__ +#define __NR_kexec_load 268 +#endif +#ifdef __powerpc__ +#define __NR_kexec_load 268 +#endif +#ifdef __x86_64__ +#define __NR_kexec_load 246 +#endif +#ifdef __s390x__ +#define __NR_kexec_load 277 +#endif +#ifdef __s390__ +#define __NR_kexec_load 277 +#endif +#ifdef __loongarch__ +#define __NR_kexec_load 104 +#endif +#if defined(__arm__) || defined(__arm64__) +#define __NR_kexec_load __NR_SYSCALL_BASE + 347 +#endif +#if defined(__mips__) +#define __NR_kexec_load 4311 +#endif +#ifdef __m68k__ +#define __NR_kexec_load 313 +#endif +#ifdef __alpha__ +#define __NR_kexec_load 448 +#endif +#ifndef __NR_kexec_load +#error Unknown processor architecture. Needs a kexec_load syscall number. +#endif +#endif /*ifndef __NR_kexec_load*/ + +#if defined(__arm__) || defined(__loongarch__) +#undef __NR_kexec_file_load +#endif + +#ifndef __NR_kexec_file_load + +#ifdef __x86_64__ +#define __NR_kexec_file_load 320 +#endif +#ifdef __powerpc64__ +#define __NR_kexec_file_load 382 +#endif +#ifdef __s390x__ +#define __NR_kexec_file_load 381 +#endif +#ifdef __aarch64__ +#define __NR_kexec_file_load 294 +#endif +#ifdef __hppa__ +#define __NR_kexec_file_load 355 +#endif + +#ifndef __NR_kexec_file_load +/* system call not available for the arch */ +#define __NR_kexec_file_load 0xffffffff /* system call not available */ +#endif + +#endif /*ifndef __NR_kexec_file_load*/ + +struct kexec_segment; + +static inline long kexec_load(void *entry, unsigned long nr_segments, + struct kexec_segment *segments, unsigned long flags) +{ + return (long) syscall(__NR_kexec_load, entry, nr_segments, segments, flags); +} + +static inline int is_kexec_file_load_implemented(void) { + if (__NR_kexec_file_load != 0xffffffff) + return 1; + return 0; +} + +static inline long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline_ptr, + unsigned long flags) +{ + return (long) syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, + cmdline_len, cmdline_ptr, flags); +} + +#define KEXEC_ON_CRASH 0x00000001 +#define KEXEC_PRESERVE_CONTEXT 0x00000002 +#define KEXEC_ARCH_MASK 0xffff0000 + +/* Flags for kexec file based system call */ +#define KEXEC_FILE_UNLOAD 0x00000001 +#define KEXEC_FILE_ON_CRASH 0x00000002 +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + +/* These values match the ELF architecture values. + * Unless there is a good reason that should continue to be the case. + */ +#define KEXEC_ARCH_DEFAULT ( 0 << 16) +#define KEXEC_ARCH_386 ( 3 << 16) +#define KEXEC_ARCH_68K ( 4 << 16) +#define KEXEC_ARCH_HPPA (15 << 16) +#define KEXEC_ARCH_X86_64 (62 << 16) +#define KEXEC_ARCH_PPC (20 << 16) +#define KEXEC_ARCH_PPC64 (21 << 16) +#define KEXEC_ARCH_IA_64 (50 << 16) +#define KEXEC_ARCH_ARM (40 << 16) +#define KEXEC_ARCH_ARM64 (183 << 16) +#define KEXEC_ARCH_S390 (22 << 16) +#define KEXEC_ARCH_SH (42 << 16) +#define KEXEC_ARCH_MIPS_LE (10 << 16) +#define KEXEC_ARCH_MIPS ( 8 << 16) +#define KEXEC_ARCH_CRIS (76 << 16) +#define KEXEC_ARCH_LOONGARCH (258 << 16) + +#define KEXEC_MAX_SEGMENTS 16 + +#ifdef __i386__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_386 +#endif +#ifdef __sh__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_SH +#endif +#ifdef __cris__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_CRIS +#endif +#ifdef __ia64__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_IA_64 +#endif +#ifdef __powerpc64__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_PPC64 +#else + #ifdef __powerpc__ + #define KEXEC_ARCH_NATIVE KEXEC_ARCH_PPC + #endif +#endif +#ifdef __x86_64__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_X86_64 +#endif +#ifdef __s390x__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_S390 +#endif +#ifdef __s390__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_S390 +#endif +#ifdef __arm__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_ARM +#endif +#if defined(__mips__) +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_MIPS +#endif +#ifdef __m68k__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_68K +#endif +#if defined(__arm64__) +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_ARM64 +#endif +#if defined(__loongarch__) +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_LOONGARCH +#endif + +#endif /* KEXEC_SYSCALL_H */ diff --git a/kexec/kexec-uImage.c b/kexec/kexec-uImage.c new file mode 100644 index 0000000..016be10 --- /dev/null +++ b/kexec/kexec-uImage.c @@ -0,0 +1,283 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <getopt.h> +#include <arch/options.h> +#include "kexec.h" +#include <kexec-uImage.h> + +#ifdef HAVE_LIBZ +#include <zlib.h> +#endif +/* + * Basic uImage loader. Not rocket science. + */ + +/* + * Returns the image type if everything goes well. This would + * allow the user to decide if the image is of their interest. + * + * Returns -1 on a corrupted image + * + * Returns 0 if this is not a uImage + */ +int uImage_probe(const char *buf, off_t len, unsigned int arch) +{ + struct image_header header; +#ifdef HAVE_LIBZ + unsigned int crc; + unsigned int hcrc; +#endif + + if ((uintmax_t)len < (uintmax_t)sizeof(header)) + return -1; + + memcpy(&header, buf, sizeof(header)); + if (be32_to_cpu(header.ih_magic) != IH_MAGIC) + return 0; +#ifdef HAVE_LIBZ + hcrc = be32_to_cpu(header.ih_hcrc); + header.ih_hcrc = 0; + crc = crc32(0, (void *)&header, sizeof(header)); + if (crc != hcrc) { + printf("Header checksum of the uImage does not match\n"); + return -1; + } +#endif + switch (header.ih_type) { + case IH_TYPE_KERNEL: + case IH_TYPE_KERNEL_NOLOAD: + break; + case IH_TYPE_RAMDISK: + break; + default: + printf("uImage type %d unsupported\n", header.ih_type); + return -1; + } + + if (header.ih_os != IH_OS_LINUX) { + printf("uImage os %d unsupported\n", header.ih_os); + return -1; + } + + if (header.ih_arch != arch) { + printf("uImage arch %d unsupported\n", header.ih_arch); + return -1; + } + + switch (header.ih_comp) { + case IH_COMP_NONE: +#ifdef HAVE_LIBZ + case IH_COMP_GZIP: +#endif + break; + default: + printf("uImage uses unsupported compression method\n"); + return -1; + } + + if (be32_to_cpu(header.ih_size) > len - sizeof(header)) { + printf("uImage header claims that image has %d bytes\n", + be32_to_cpu(header.ih_size)); + printf("we read only %lld bytes.\n", + (long long)len - sizeof(header)); + return -1; + } +#ifdef HAVE_LIBZ + crc = crc32(0, (void *)buf + sizeof(header), be32_to_cpu(header.ih_size)); + if (crc != be32_to_cpu(header.ih_dcrc)) { + printf("uImage: The data CRC does not match. Computed: %08x " + "expected %08x\n", crc, + be32_to_cpu(header.ih_dcrc)); + return -1; + } +#endif + return (int)header.ih_type; +} + +/* + * To conform to the 'probe' routine in file_type struct, + * we return : + * 0 - If the image is valid 'type' image. + * + * Now, we have to pass on the 'errors' in the image. So, + * + * -1 - If the image is corrupted. + * 1 - If the image is not a uImage. + */ + +int uImage_probe_kernel(const char *buf, off_t len, unsigned int arch) +{ + int type = uImage_probe(buf, len, arch); + if (type < 0) + return -1; + + return !(type == IH_TYPE_KERNEL || type == IH_TYPE_KERNEL_NOLOAD); +} + +int uImage_probe_ramdisk(const char *buf, off_t len, unsigned int arch) +{ + int type = uImage_probe(buf, len, arch); + + if (type < 0) + return -1; + return !(type == IH_TYPE_RAMDISK); +} + +#ifdef HAVE_LIBZ +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +static int uImage_gz_load(const char *buf, off_t len, + struct Image_info *image) +{ + int ret; + z_stream strm; + unsigned int skip; + unsigned int flags; + unsigned char *uncomp_buf; + unsigned int mem_alloc; + + mem_alloc = 10 * 1024 * 1024; + uncomp_buf = malloc(mem_alloc); + if (!uncomp_buf) + return -1; + + memset(&strm, 0, sizeof(strm)); + + /* Skip magic, method, time, flags, os code ... */ + skip = 10; + + /* check GZ magic */ + if (buf[0] != 0x1f || buf[1] != 0x8b) { + free(uncomp_buf); + return -1; + } + + flags = buf[3]; + if (buf[2] != Z_DEFLATED || (flags & RESERVED) != 0) { + puts ("Error: Bad gzipped data\n"); + free(uncomp_buf); + return -1; + } + + if (flags & EXTRA_FIELD) { + skip += 2; + skip += buf[10]; + skip += buf[11] << 8; + } + if (flags & ORIG_NAME) { + while (buf[skip++]) + ; + } + if (flags & COMMENT) { + while (buf[skip++]) + ; + } + if (flags & HEAD_CRC) + skip += 2; + + strm.avail_in = len - skip; + strm.next_in = (void *)buf + skip; + + /* - activates parsing gz headers */ + ret = inflateInit2(&strm, -MAX_WBITS); + if (ret != Z_OK) { + free(uncomp_buf); + return -1; + } + + strm.next_out = uncomp_buf; + strm.avail_out = mem_alloc; + + do { + ret = inflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) + break; + + if (ret == Z_OK || ret == Z_BUF_ERROR) { + void *new_buf; + int inc_buf = 5 * 1024 * 1024; + + mem_alloc += inc_buf; + new_buf = realloc(uncomp_buf, mem_alloc); + if (!new_buf) { + inflateEnd(&strm); + free(uncomp_buf); + return -1; + } + + uncomp_buf = new_buf; + strm.next_out = uncomp_buf + mem_alloc - inc_buf; + strm.avail_out = inc_buf; + } else { + free(uncomp_buf); + printf("Error during decompression %d\n", ret); + return -1; + } + } while (1); + + inflateEnd(&strm); + image->buf = (char *)uncomp_buf; + image->len = mem_alloc - strm.avail_out; + return 0; +} +#else +static int uImage_gz_load(const char *UNUSED(buf), off_t UNUSED(len), + struct Image_info *UNUSED(image)) +{ + return -1; +} +#endif + +int uImage_load(const char *buf, off_t len, struct Image_info *image) +{ + const struct image_header *header = (const struct image_header *)buf; + const char *img_buf = buf + sizeof(struct image_header); + off_t img_len = be32_to_cpu(header->ih_size); + + /* + * Prevent loading a modified image. + * CRC check is perfomed only when zlib is compiled + * in. This check will help us to detect + * size related vulnerabilities. + */ + if (img_len != (len - sizeof(struct image_header))) { + printf("Image size doesn't match the header\n"); + return -1; + } + + image->base = cpu_to_be32(header->ih_load); + image->ep = cpu_to_be32(header->ih_ep); + switch (header->ih_comp) { + case IH_COMP_NONE: + image->buf = img_buf; + image->len = img_len; + return 0; + break; + + case IH_COMP_GZIP: + /* + * uboot doesn't decompress the RAMDISK images. + * Comply to the uboot behaviour. + */ + if (header->ih_type == IH_TYPE_RAMDISK) { + image->buf = img_buf; + image->len = img_len; + return 0; + } else + return uImage_gz_load(img_buf, img_len, image); + break; + + default: + return -1; + } +} diff --git a/kexec/kexec-xen.c b/kexec/kexec-xen.c new file mode 100644 index 0000000..a7c8933 --- /dev/null +++ b/kexec/kexec-xen.c @@ -0,0 +1,297 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <elf.h> +#include "kexec.h" +#include "kexec-syscall.h" +#include "crashdump.h" + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL +#include "kexec-xen.h" + +#include "crashdump.h" + +#ifdef CONFIG_LIBXENCTRL_DL +#include <dlfcn.h> + +/* The handle from dlopen(), needed by dlsym(), dlclose() */ +static void *xc_dlhandle; +xc_hypercall_buffer_t XC__HYPERCALL_BUFFER_NAME(HYPERCALL_BUFFER_NULL); + +void *__xc_dlsym(const char *symbol) +{ + return dlsym(xc_dlhandle, symbol); +} + +xc_interface *__xc_interface_open(xentoollog_logger *logger, + xentoollog_logger *dombuild_logger, + unsigned open_flags) +{ + xc_interface *xch = NULL; + + if (!xc_dlhandle) + xc_dlhandle = dlopen("libxenctrl.so", RTLD_NOW | RTLD_NODELETE); + + if (xc_dlhandle) { + typedef xc_interface *(*func_t)(xentoollog_logger *logger, + xentoollog_logger *dombuild_logger, + unsigned open_flags); + + func_t func = (func_t)dlsym(xc_dlhandle, "xc_interface_open"); + xch = func(logger, dombuild_logger, open_flags); + } + + return xch; +} + +int __xc_interface_close(xc_interface *xch) +{ + int rc = -1; + + if (xc_dlhandle) { + typedef int (*func_t)(xc_interface *xch); + + func_t func = (func_t)dlsym(xc_dlhandle, "xc_interface_close"); + rc = func(xch); + dlclose(xc_dlhandle); + xc_dlhandle = NULL; + } + + return rc; +} +#endif /* CONFIG_LIBXENCTRL_DL */ + +int xen_get_kexec_range(int range, uint64_t *start, uint64_t *end) +{ + uint64_t size; + xc_interface *xc; + int rc = -1; + + xc = xc_interface_open(NULL, NULL, 0); + if (!xc) { + fprintf(stderr, "failed to open xen control interface.\n"); + goto out; + } + + rc = xc_kexec_get_range(xc, range, 0, &size, start); + if (rc < 0) { + fprintf(stderr, "failed to get range=%d from hypervisor.\n", range); + goto out_close; + } + + *end = *start + size - 1; + +out_close: + xc_interface_close(xc); + +out: + return rc; +} + +static uint8_t xen_get_kexec_type(unsigned long kexec_flags) +{ + if (kexec_flags & KEXEC_ON_CRASH) + return KEXEC_TYPE_CRASH; + + if (kexec_flags & KEXEC_LIVE_UPDATE) + return KEXEC_TYPE_LIVE_UPDATE; + + return KEXEC_TYPE_DEFAULT; +} + +#define IDENTMAP_1MiB (1024 * 1024) + +int xen_kexec_load(struct kexec_info *info) +{ + uint32_t nr_segments = info->nr_segments, nr_low_segments = 0; + struct kexec_segment *segments = info->segment; + uint64_t low_watermark = 0; + xc_interface *xch; + xc_hypercall_buffer_array_t *array = NULL; + uint8_t type; + uint8_t arch; + xen_kexec_segment_t *xen_segs, *seg; + int s; + int ret = -1; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + /* + * Ensure 0 - 1 MiB is mapped and accessible by the image. + * This allows access to the VGA memory and the region + * purgatory copies in the crash case. + * + * First, count the number of additional segments which will + * need to be added in between the ones in segments[]. + * + * The segments are already sorted. + */ + for (s = 0; s < nr_segments && (uint64_t)segments[s].mem <= IDENTMAP_1MiB; s++) { + if ((uint64_t)segments[s].mem > low_watermark) + nr_low_segments++; + + low_watermark = (uint64_t)segments[s].mem + segments[s].memsz; + } + if (low_watermark < IDENTMAP_1MiB) + nr_low_segments++; + + low_watermark = 0; + + xen_segs = calloc(nr_segments + nr_low_segments, sizeof(*xen_segs)); + if (!xen_segs) + goto out; + + array = xc_hypercall_buffer_array_create(xch, nr_segments); + if (array == NULL) + goto out; + + seg = xen_segs; + for (s = 0; s < nr_segments; s++) { + DECLARE_HYPERCALL_BUFFER(void, seg_buf); + + if (low_watermark < IDENTMAP_1MiB && (uint64_t)segments[s].mem > low_watermark) { + set_xen_guest_handle(seg->buf.h, HYPERCALL_BUFFER_NULL); + seg->buf_size = 0; + seg->dest_maddr = low_watermark; + low_watermark = (uint64_t)segments[s].mem; + if (low_watermark > IDENTMAP_1MiB) + low_watermark = IDENTMAP_1MiB; + seg->dest_size = low_watermark - seg->dest_maddr; + seg++; + } + + seg_buf = xc_hypercall_buffer_array_alloc(xch, array, s, + seg_buf, segments[s].bufsz); + if (seg_buf == NULL) + goto out; + memcpy(seg_buf, segments[s].buf, segments[s].bufsz); + + set_xen_guest_handle(seg->buf.h, seg_buf); + seg->buf_size = segments[s].bufsz; + seg->dest_maddr = (uint64_t)segments[s].mem; + seg->dest_size = segments[s].memsz; + seg++; + + low_watermark = (uint64_t)segments[s].mem + segments[s].memsz; + } + + if ((uint64_t)low_watermark < IDENTMAP_1MiB) { + set_xen_guest_handle(seg->buf.h, HYPERCALL_BUFFER_NULL); + seg->buf_size = 0; + seg->dest_maddr = low_watermark; + seg->dest_size = IDENTMAP_1MiB - low_watermark; + seg++; + } + + type = xen_get_kexec_type(info->kexec_flags); + + arch = (info->kexec_flags & KEXEC_ARCH_MASK) >> 16; +#if defined(__i386__) || defined(__x86_64__) + if (!arch) + arch = EM_386; +#endif + + ret = xc_kexec_load(xch, type, arch, (uint64_t)info->entry, + nr_segments + nr_low_segments, xen_segs); + +out: + xc_hypercall_buffer_array_destroy(xch, array); + free(xen_segs); + xc_interface_close(xch); + + return ret; +} + +int xen_kexec_unload(uint64_t kexec_flags) +{ + xc_interface *xch; + uint8_t type; + int ret; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + type = xen_get_kexec_type(kexec_flags); + + ret = xc_kexec_unload(xch, type); + + xc_interface_close(xch); + + return ret; +} + +int xen_kexec_status(uint64_t kexec_flags) +{ + xc_interface *xch; + uint8_t type; + int ret = -1; + +#ifdef HAVE_KEXEC_CMD_STATUS + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + type = xen_get_kexec_type(kexec_flags); + + ret = xc_kexec_status(xch, type); + + xc_interface_close(xch); +#endif + + return ret; +} + +int xen_kexec_exec(uint64_t kexec_flags) +{ + xc_interface *xch; + uint8_t type = KEXEC_TYPE_DEFAULT; + int ret; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + if (kexec_flags & KEXEC_LIVE_UPDATE) + type = KEXEC_TYPE_LIVE_UPDATE; + + ret = xc_kexec_exec(xch, type); + + xc_interface_close(xch); + + return ret; +} + +#else /* ! HAVE_LIBXENCTRL */ + +int xen_get_kexec_range(int range, uint64_t *start, uint64_t *end) +{ + return -1; +} + +int xen_kexec_load(struct kexec_info *UNUSED(info)) +{ + return -1; +} + +int xen_kexec_unload(uint64_t kexec_flags) +{ + return -1; +} + +int xen_kexec_status(uint64_t kexec_flags) +{ + return -1; +} + +int xen_kexec_exec(uint64_t kexec_flags) +{ + return -1; +} + +#endif diff --git a/kexec/kexec-xen.h b/kexec/kexec-xen.h new file mode 100644 index 0000000..70fb576 --- /dev/null +++ b/kexec/kexec-xen.h @@ -0,0 +1,87 @@ +#ifndef KEXEC_XEN_H +#define KEXEC_XEN_H + +#ifdef HAVE_LIBXENCTRL +#include <xenctrl.h> + +#ifdef CONFIG_LIBXENCTRL_DL +/* Lookup symbols in libxenctrl.so */ +extern void *__xc_dlsym(const char *symbol); + +/* Wrappers around xc_interface_open/close() to insert dlopen/dlclose() */ +extern xc_interface *__xc_interface_open(xentoollog_logger *logger, + xentoollog_logger *dombuild_logger, + unsigned open_flags); +extern int __xc_interface_close(xc_interface *xch); + +/* GCC expression statements for evaluating dlsym() */ +#define __xc_call(dtype, name, args...) \ +( \ + { dtype value; \ + typedef dtype (*func_t)(xc_interface *, ...); \ + func_t func = __xc_dlsym(#name); \ + value = func(args); \ + value; } \ +) +#define __xc_data(dtype, name) \ +( \ + { dtype *value = (dtype *)__xc_dlsym(#name); value; } \ +) + +/* The wrappers around utilized xenctrl.h functions */ +#define xc_interface_open(a, b, c) \ + __xc_interface_open(a, b, c) +#define xc_interface_close(a) \ + __xc_interface_close(a) +#define xc_version(args...) \ + __xc_call(int, xc_version, args) +#define xc_get_max_cpus(args...) \ + __xc_call(int, xc_get_max_cpus, args) +#define xc_get_machine_memory_map(args...) \ + __xc_call(int, xc_get_machine_memory_map, args) +#define xc_kexec_get_range(args...) \ + __xc_call(int, xc_kexec_get_range, args) +#define xc_kexec_load(args...) \ + __xc_call(int, xc_kexec_load, args) +#define xc_kexec_unload(args...) \ + __xc_call(int, xc_kexec_unload, args) +#define xc_kexec_status(args...) \ + __xc_call(int, xc_kexec_status, args) +#define xc_kexec_exec(args...) \ + __xc_call(int, xc_kexec_exec, args) +#define xc_hypercall_buffer_array_create(args...) \ + __xc_call(xc_hypercall_buffer_array_t *, xc_hypercall_buffer_array_create, args) +#define xc__hypercall_buffer_alloc_pages(args...) \ + __xc_call(void *, xc__hypercall_buffer_alloc_pages, args) +#define xc__hypercall_buffer_free_pages(args...) \ + __xc_call(void , xc__hypercall_buffer_free_pages, args) +#define xc__hypercall_buffer_array_alloc(args...) \ + __xc_call(void *, xc__hypercall_buffer_array_alloc, args) +#define xc__hypercall_buffer_array_get(args...) \ + __xc_call(void *, xc__hypercall_buffer_array_get, args) +#define xc_hypercall_buffer_array_destroy(args...) \ + __xc_call(void *, xc_hypercall_buffer_array_destroy, args) + +#endif /* CONFIG_LIBXENCTRL_DL */ + +#endif /* HAVE_LIBXENCTRL */ + +#ifndef KEXEC_RANGE_MA_XEN +#define KEXEC_RANGE_MA_XEN 1 +#endif + +#ifndef KEXEC_RANGE_MA_LIVEUPDATE +#define KEXEC_RANGE_MA_LIVEUPDATE 7 +#endif + +#ifndef KEXEC_TYPE_LIVE_UPDATE +#define KEXEC_TYPE_LIVE_UPDATE 2 +#endif + +#ifndef KEXEC_LIVE_UPDATE +#define KEXEC_LIVE_UPDATE 0x00000004 +#endif + +int xen_get_kexec_range(int range, uint64_t *start, uint64_t *end); + +#endif /* KEXEC_XEN_H */ diff --git a/kexec/kexec-zlib.h b/kexec/kexec-zlib.h new file mode 100644 index 0000000..16300f2 --- /dev/null +++ b/kexec/kexec-zlib.h @@ -0,0 +1,11 @@ +#ifndef __KEXEC_ZLIB_H +#define __KEXEC_ZLIB_H + +#include <stdio.h> +#include <sys/types.h> + +#include "config.h" + +int is_zlib_file(const char *filename, off_t *r_size); +char *zlib_decompress_file(const char *filename, off_t *r_size); +#endif /* __KEXEC_ZLIB_H */ diff --git a/kexec/kexec.8 b/kexec/kexec.8 new file mode 100644 index 0000000..3a344c5 --- /dev/null +++ b/kexec/kexec.8 @@ -0,0 +1,399 @@ +.\" Process this file with +.\" groff -man -Tascii kexec.8 +.\" +.TH kexec 8 "April 2006" Linux "User Manuals" +.SH NAME +kexec \- directly boot into a new kernel +.SH SYNOPSIS +.B /sbin/kexec +.B [-v (\-\-version)] [-f (\-\-force)] [-x (\-\-no-ifdown)] [-y (\-\-no-sync)] [-l (\-\-load)] [-p (\-\-load-panic)] [-u (\-\-unload)] [-e (\-\-exec)] [-t (\-\-type)] +.BI [\-\-mem\-min= addr ] +.BI [\-\-mem\-max= addr ] + +.SH DESCRIPTION +.B kexec +is a system call that enables you to load and boot into another +kernel from the currently running kernel. +.B kexec +performs the function of the boot loader from within the kernel. The +primary difference between a standard system boot and a +.B kexec +boot is that the hardware initialization normally performed by the BIOS +or firmware (depending on architecture) is not performed during a +.B kexec +boot. This has the effect of reducing the time required for a reboot. +.PP +Make sure you have selected +.B CONFIG_KEXEC=y +when configuring the kernel. The +.B CONFIG_KEXEC +option enables the +.B kexec +system call. +.SH USAGE +Using +.B kexec +consists of + +.RS +(1) loading the kernel to be rebooted to into memory, and +.RE +.RS +(2) actually rebooting to the pre-loaded kernel. +.RE +.PP +To load a kernel, the syntax is as follows: + +.RS +.B kexec +.RI \-l\ kernel-image +.RI "\-\-append=" command\-line\-options +.RI "\-\-initrd=" initrd\-image +.RE + +where +.I kernel\-image +is the kernel file that you intend to reboot to. +.PP +Insert the command-line parameters that must be passed to the new +kernel into +.IR command\-line\-options . +Passing the exact contents of /proc/cmdline into +.I command\-line\-options +is the safest way to ensure that correct values are passed to the +rebooting kernel. +.PP +The optional +.I initrd-image +is the initrd image to be used during boot. +.PP +It's also possible to invoke +.B kexec +without an option parameter. In that case, kexec loads the specified +kernel and then invokes +.BR shutdown (8). +If the shutdown scripts of your Linux distribution support +kexec-based rebooting, they then call +.B kexec +.I -e +just before actually rebooting the machine. That way, the machine does +a clean shutdown including all shutdown scripts. + +.SH EXAMPLE +.PP +For example, if the kernel image you want to reboot to is +.BR /boot/vmlinux , +the contents of /proc/cmdline is +.BR "root\=/dev/hda1" , +and the path to the initrd is +.BR /boot/initrd , +then you would use the following command to load the kernel: + +.RS +.B kexec +.RB \-l\ /boot/vmlinux +.RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd +.RE +.PP +After this kernel is loaded, it can be booted to at any time using the +command: + +.RS +.BR kexec \ \-e +.RE + +.SH OPTIONS +.TP +.B \-d\ (\-\-debug) +Enable debugging messages. +.TP +.B \-S\ (\-\-status) +Return 1 if the type (by default crash) is loaded, 0 if not. Can be used in +conjuction with -l or -p to toggle the type. Note this option supersedes other +options and it will +.BR not\ load\ or\ unload\ the\ kernel. +.TP +.B \-e\ (\-\-exec) +Run the currently loaded kernel. Note that it will reboot into the loaded kernel without calling shutdown(8). +.TP +.B \-f\ (\-\-force) +Force an immediate +.B kexec +call, do not call +.BR shutdown (8) +(contrary to the default action without any option parameter). This option +performs the same actions like executing +.IR -l +and +.IR -e +in one call. +.TP +.B \-h\ (\-\-help) +Open a help file for +.BR kexec . +.TP +.B \-i\ (\-\-no-checks) +Fast reboot, no memory integrity checks. +.TP +.BI \-l\ (\-\-load) \ kernel +Load the specified +.I kernel +into the current kernel. +.TP +.B \-p\ (\-\-load\-panic) +Load the new kernel for use on panic. +.TP +.BI \-t\ (\-\-type= type ) +Specify that the new kernel is of this +.I type. +.TP +.BI \-s\ (\-\-kexec-file-syscall) +Specify that the new KEXEC_FILE_LOAD syscall should be used exclusively. +.TP +.BI \-c\ (\-\-kexec-syscall) +Specify that the old KEXEC_LOAD syscall should be used exclusively. +.TP +.BI \-a\ (\-\-kexec-syscall-auto) +Try the new KEXEC_FILE_LOAD syscall first and when it is not supported or the +kernel does not understand the supplied image fall back to the old KEXEC_LOAD +interface. + +There is no one single interface that always works, so this is the default. + +KEXEC_FILE_LOAD is required on systems that use locked-down secure boot to +verify the kernel signature. KEXEC_LOAD may be also disabled in the kernel +configuration. + +KEXEC_LOAD is required for some kernel image formats and on architectures that +do not implement KEXEC_FILE_LOAD. +.TP +.B \-u\ (\-\-unload) +Unload the current +.B kexec +target kernel. If a capture kernel is being unloaded then specify -p with -u. +.TP +.B \-v\ (\-\-version) +Return the version number of the installed utility. +.TP +.B \-x\ (\-\-no\-ifdown) +Shut down the running kernel, but restore the interface on reload. +.TP +.B \-y\ (\-\-no\-sync) +Shut down the running kernel, but skip syncing the filesystems. +.TP +.BI \-\-mem\-min= addr +Specify the lowest memory address +.I addr +to load code into. +.TP +.BI \-\-mem\-max= addr +Specify the highest memory address +.I addr +to load code into. +.TP +.BI \-\-entry= addr +Specify the jump back address. (0 means it's not jump back or preserve context) +.TP +.BI \-\-load\-preserve\-context +Load the new kernel and preserve context of current kernel during kexec. +.TP +.BI \-\-load\-jump\-back\-helper +Load a helper image to jump back to original kernel. +.TP +.BI \-\-reuseinitrd +Reuse initrd from first boot. +.TP +.BI \-\-print-ckr-size +Print crash kernel region size, if available. + + +.SH SUPPORTED KERNEL FILE TYPES AND OPTIONS +.B Beoboot-x86 +.RS +.TP +.B \-\-args\-elf +Pass ELF boot notes. +.TP +.B \-\-args\-linux +Pass Linux kernel style options. +.TP +.B \-\-real\-mode +Use the kernel's real mode entry point. +.RE +.PP +.B elf-x86 +.RS +.TP +.BI \-\-append= string +Append +.I string +to the kernel command line. +.TP +.BI \-\-command\-line= string +Set the kernel command line to +.IR string . +.TP +.BI \-\-reuse-cmdline +Use the command line from the running system. When a panic kernel is loaded, it +strips the +.I +crashkernel +parameter automatically. The +.I BOOT_IMAGE +parameter is also stripped. +.TP +.BI \-\-initrd= file +Use +.I file +as the kernel's initial ramdisk. +.TP +.BI \-\-ramdisk= file +Use +.I file +as the kernel's initial ramdisk. +.RE +.PP +.B bzImage-x86 +.RS +.TP +.BI \-\-append= string +Append +.I string +to the kernel command line. +.TP +.BI \-\-command\-line= string +Set the kernel command line to +.IR string . +.TP +.BI \-\-reuse-cmdline +Use the command line from the running system. When a panic kernel is loaded, it +strips the +.I +crashkernel +parameter automatically. The +.I BOOT_IMAGE +parameter is also stripped. +.TP +.BI \-\-initrd= file +Use +.I file +as the kernel's initial ramdisk. +.TP +.BI \-\-ramdisk= file +Use +.I file +as the kernel's initial ramdisk. +.TP +.BI \-\-real-mode +Use real-mode entry point. +.RE +.PP +.B multiboot-x86 +.RS +.TP +.BI \-\-command\-line= string +Set the kernel command line to +.IR string . +.TP +.BI \-\-reuse-cmdline +Use the command line from the running system. When a panic kernel is loaded, it +strips the +.I +crashkernel +parameter automatically. The +.I BOOT_IMAGE +parameter is also stripped. +.TP +.BI \-\-module= "mod arg1 arg2 ..." +Load module +.I mod +with command-line arguments +.I "arg1 arg2 ..." +This parameter can be specified multiple times. +.RE +.PP +.B multiboot2-x86 +.RS +.TP +.BI \-\-command\-line= string +Set the kernel command line to +.IR string . +.TP +.BI \-\-reuse-cmdline +Use the command line from the running system. When a panic kernel is loaded, it +strips the +.I +crashkernel +parameter automatically. The +.I BOOT_IMAGE +parameter is also stripped. +.TP +.BI \-\-module= "mod arg1 arg2 ..." +Load module +.I mod +with command-line arguments +.I "arg1 arg2 ..." +This parameter can be specified multiple times. +.RE +.PP +.B elf-ppc64 +.RS +.TP +.BI \-\-reuse-cmdline +Use the kernel command line from the running system. +.TP +.BI \-\-command\-line= string +Set the kernel command line to +.IR string. +.TP +.BI \-\-append= string +Set the kernel command line to +.IR string. +.TP +.BI \-\-ramdisk= file +Use +.IR file +as the initial RAM disk. +.TP +.BI \-\-initrd= file +Use +.IR file +as the initial RAM disk. +.TP +.BI \-\-devicetreeblob= file +Specify device tree blob file. Not applicable while using --kexec-file-syscall. +.TP +.BI \-\-dtb= file +Specify device tree blob file. Not applicable while using --kexec-file-syscall. +.RE + +.SH ARCHITECTURE OPTIONS +.TP +.B \-\-console\-serial +Enable the serial console. +.TP +.B \-\-console\-vga +Enable the VGA console. +.TP +.B \-\-elf32\-core\-headers +Prepare core headers in ELF32 format. +.TP +.B \-\-elf64\-core\-headers +Prepare core headers in ELF64 format. +.TP +.B \-\-reset\-vga +Attempt to reset a standard VGA device. +.TP +.BI \-\-serial= port +Specify the serial +.I port +for debug output. +.TP +.BI \-\-serial\-baud= baud_rate +Specify the +.I baud rate +of the serial port. +.TP +.BI \-\-dt\-no\-old\-root +Do not reuse old kernel root=<device> +param while creating flatten device tree. diff --git a/kexec/kexec.c b/kexec/kexec.c new file mode 100644 index 0000000..c3b182e --- /dev/null +++ b/kexec/kexec.c @@ -0,0 +1,1712 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * + * Modified (2007-05-15) by Francesco Chiechi to rudely handle mips platform + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <inttypes.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/reboot.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#ifndef _O_BINARY +#define _O_BINARY 0 +#endif +#include <getopt.h> +#include <ctype.h> + +#include "config.h" + +#include <sha256.h> +#include "kexec.h" +#include "kexec-syscall.h" +#include "kexec-elf.h" +#include "kexec-xen.h" +#include "kexec-sha256.h" +#include "kexec-zlib.h" +#include "kexec-lzma.h" +#include <arch/options.h> + +#define KEXEC_LOADED_PATH "/sys/kernel/kexec_loaded" +#define KEXEC_CRASH_LOADED_PATH "/sys/kernel/kexec_crash_loaded" + +unsigned long long mem_min = 0; +unsigned long long mem_max = ULONG_MAX; +static unsigned long kexec_flags = 0; +/* Flags for kexec file (fd) based syscall */ +static unsigned long kexec_file_flags = 0; +int kexec_debug = 0; + +void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr) +{ + int i; + dbgprintf("%s\n", prefix); + for (i = 0; i < nr_mr; i++) { + dbgprintf("%016llx-%016llx (%d)\n", mr[i].start, + mr[i].end, mr[i].type); + } +} + +void die(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fflush(stdout); + fflush(stderr); + exit(1); +} + +static char *xstrdup(const char *str) +{ + char *new = strdup(str); + if (!new) + die("Cannot strdup \"%s\": %s\n", + str, strerror(errno)); + return new; +} + +void *xmalloc(size_t size) +{ + void *buf; + if (!size) + return NULL; + buf = malloc(size); + if (!buf) { + die("Cannot malloc %ld bytes: %s\n", + size + 0UL, strerror(errno)); + } + return buf; +} + +void *xrealloc(void *ptr, size_t size) +{ + void *buf; + buf = realloc(ptr, size); + if (!buf) { + die("Cannot realloc %ld bytes: %s\n", + size + 0UL, strerror(errno)); + } + return buf; +} + +int valid_memory_range(struct kexec_info *info, + unsigned long sstart, unsigned long send) +{ + int i; + if (sstart > send) { + return 0; + } + if ((send > mem_max) || (sstart < mem_min)) { + return 0; + } + for (i = 0; i < info->memory_ranges; i++) { + unsigned long mstart, mend; + /* Only consider memory ranges */ + if (info->memory_range[i].type != RANGE_RAM) + continue; + mstart = info->memory_range[i].start; + mend = info->memory_range[i].end; + if (i < info->memory_ranges - 1 + && mend == info->memory_range[i+1].start + && info->memory_range[i+1].type == RANGE_RAM) + mend = info->memory_range[i+1].end; + + /* Check to see if we are fully contained */ + if ((mstart <= sstart) && (mend >= send)) { + return 1; + } + } + return 0; +} + +static int valid_memory_segment(struct kexec_info *info, + struct kexec_segment *segment) +{ + unsigned long sstart, send; + sstart = (unsigned long)segment->mem; + send = sstart + segment->memsz - 1; + + return valid_memory_range(info, sstart, send); +} + +void print_segments(FILE *f, struct kexec_info *info) +{ + int i; + + fprintf(f, "nr_segments = %d\n", info->nr_segments); + for (i = 0; i < info->nr_segments; i++) { + fprintf(f, "segment[%d].buf = %p\n", i, + info->segment[i].buf); + fprintf(f, "segment[%d].bufsz = 0x%zx\n", i, + info->segment[i].bufsz); + fprintf(f, "segment[%d].mem = %p\n", i, + info->segment[i].mem); + fprintf(f, "segment[%d].memsz = 0x%zx\n", i, + info->segment[i].memsz); + } +} + +int sort_segments(struct kexec_info *info) +{ + int i, j; + void *end; + + /* Do a stupid insertion sort... */ + for (i = 0; i < info->nr_segments; i++) { + int tidx; + struct kexec_segment temp; + tidx = i; + for (j = i +1; j < info->nr_segments; j++) { + if (info->segment[j].mem < info->segment[tidx].mem) { + tidx = j; + } + } + if (tidx != i) { + temp = info->segment[tidx]; + info->segment[tidx] = info->segment[i]; + info->segment[i] = temp; + } + } + /* Now see if any of the segments overlap */ + end = 0; + for (i = 0; i < info->nr_segments; i++) { + if (end > info->segment[i].mem) { + fprintf(stderr, "Overlapping memory segments at %p\n", + end); + return -1; + } + end = ((char *)info->segment[i].mem) + info->segment[i].memsz; + } + return 0; +} + +unsigned long locate_hole(struct kexec_info *info, + unsigned long hole_size, unsigned long hole_align, + unsigned long hole_min, unsigned long hole_max, + int hole_end) +{ + int i, j; + struct memory_range *mem_range; + int max_mem_ranges, mem_ranges; + unsigned long hole_base; + + if (hole_end == 0) { + die("Invalid hole end argument of 0 specified to locate_hole"); + } + + /* Set an initial invalid value for the hole base */ + hole_base = ULONG_MAX; + + /* Align everything to at least a page size boundary */ + if (hole_align < (unsigned long)getpagesize()) { + hole_align = getpagesize(); + } + + /* Compute the free memory ranges */ + max_mem_ranges = info->memory_ranges + info->nr_segments; + mem_range = xmalloc(max_mem_ranges *sizeof(struct memory_range)); + mem_ranges = 0; + + /* Perform a merge on the 2 sorted lists of memory ranges */ + for (j = 0, i = 0; i < info->memory_ranges; i++) { + unsigned long long sstart, send; + unsigned long long mstart, mend; + mstart = info->memory_range[i].start; + mend = info->memory_range[i].end; + if (info->memory_range[i].type != RANGE_RAM) + continue; + while ((j < info->nr_segments) && + (((unsigned long)info->segment[j].mem) <= mend)) { + sstart = (unsigned long)info->segment[j].mem; + send = sstart + info->segment[j].memsz -1; + if (mstart < sstart) { + mem_range[mem_ranges].start = mstart; + mem_range[mem_ranges].end = sstart -1; + mem_range[mem_ranges].type = RANGE_RAM; + mem_ranges++; + } + mstart = send +1; + j++; + } + if (mstart < mend) { + mem_range[mem_ranges].start = mstart; + mem_range[mem_ranges].end = mend; + mem_range[mem_ranges].type = RANGE_RAM; + mem_ranges++; + } + } + /* Now find the end of the last memory_range I can use */ + for (i = 0; i < mem_ranges; i++) { + unsigned long long start, end, size; + start = mem_range[i].start; + end = mem_range[i].end; + /* First filter the range start and end values + * through the lens of mem_min, mem_max and hole_align. + */ + if (start < mem_min) { + start = mem_min; + } + if (start < hole_min) { + start = hole_min; + } + start = _ALIGN(start, hole_align); + if (end > mem_max) { + end = mem_max; + } + if (end > hole_max) { + end = hole_max; + } + /* Is this still a valid memory range? */ + if ((start >= end) || (start >= mem_max) || (end <= mem_min)) { + continue; + } + /* Is there enough space left so we can use it? */ + size = end - start; + if (!hole_size || size >= hole_size - 1) { + if (hole_end > 0) { + hole_base = start; + break; + } else { + hole_base = _ALIGN_DOWN(end - hole_size + 1, + hole_align); + } + } + } + free(mem_range); + if (hole_base == ULONG_MAX) { + fprintf(stderr, "Could not find a free area of memory of " + "0x%lx bytes...\n", hole_size); + return ULONG_MAX; + } + if (hole_size && (hole_base + hole_size - 1) > hole_max) { + fprintf(stderr, "Could not find a free area of memory below: " + "0x%lx...\n", hole_max); + return ULONG_MAX; + } + return hole_base; +} + +void add_segment_phys_virt(struct kexec_info *info, + const void *buf, size_t bufsz, + unsigned long base, size_t memsz, int phys) +{ + unsigned long last; + size_t size; + int pagesize; + + if (bufsz > memsz) { + bufsz = memsz; + } + /* Forget empty segments */ + if (memsz == 0) { + return; + } + + /* Round memsz up to a multiple of pagesize */ + pagesize = getpagesize(); + memsz = _ALIGN(memsz, pagesize); + + /* Verify base is pagesize aligned. + * Finding a way to cope with this problem + * is important but for now error so at least + * we are not surprised by the code doing the wrong + * thing. + */ + if (base & (pagesize -1)) { + die("Base address: 0x%lx is not page aligned\n", base); + } + + if (phys) + base = virt_to_phys(base); + + last = base + memsz -1; + if (!valid_memory_range(info, base, last)) { + die("Invalid memory segment %p - %p\n", + (void *)base, (void *)last); + } + + size = (info->nr_segments + 1) * sizeof(info->segment[0]); + info->segment = xrealloc(info->segment, size); + info->segment[info->nr_segments].buf = buf; + info->segment[info->nr_segments].bufsz = bufsz; + info->segment[info->nr_segments].mem = (void *)base; + info->segment[info->nr_segments].memsz = memsz; + info->nr_segments++; + if (info->nr_segments > KEXEC_MAX_SEGMENTS) { + fprintf(stderr, "Warning: kernel segment limit reached. " + "This will likely fail\n"); + } +} + +unsigned long add_buffer_phys_virt(struct kexec_info *info, + const void *buf, unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, unsigned long buf_max, + int buf_end, int phys) +{ + unsigned long base; + int result; + int pagesize; + + result = sort_segments(info); + if (result < 0) { + die("sort_segments failed\n"); + } + + /* Round memsz up to a multiple of pagesize */ + pagesize = getpagesize(); + memsz = _ALIGN(memsz, pagesize); + + base = locate_hole(info, memsz, buf_align, buf_min, buf_max, buf_end); + if (base == ULONG_MAX) { + die("locate_hole failed\n"); + } + + add_segment_phys_virt(info, buf, bufsz, base, memsz, phys); + return base; +} + +unsigned long add_buffer_virt(struct kexec_info *info, const void *buf, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, int buf_end) +{ + return add_buffer_phys_virt(info, buf, bufsz, memsz, buf_align, + buf_min, buf_max, buf_end, 0); +} + +static int find_memory_range(struct kexec_info *info, + unsigned long *base, unsigned long *size) +{ + int i; + unsigned long start, end; + + for (i = 0; i < info->memory_ranges; i++) { + if (info->memory_range[i].type != RANGE_RAM) + continue; + start = info->memory_range[i].start; + end = info->memory_range[i].end; + if (end > *base && start < *base + *size) { + if (start > *base) { + *size = *base + *size - start; + *base = start; + } + if (end < *base + *size) + *size = end - *base; + return 1; + } + } + return 0; +} + +static int find_segment_hole(struct kexec_info *info, + unsigned long *base, unsigned long *size) +{ + int i; + unsigned long seg_base, seg_size; + + for (i = 0; i < info->nr_segments; i++) { + seg_base = (unsigned long)info->segment[i].mem; + seg_size = info->segment[i].memsz; + + if (seg_base + seg_size <= *base) + continue; + else if (seg_base >= *base + *size) + break; + else if (*base < seg_base) { + *size = seg_base - *base; + break; + } else if (seg_base + seg_size < *base + *size) { + *size = *base + *size - (seg_base + seg_size); + *base = seg_base + seg_size; + } else { + *size = 0; + break; + } + } + return *size; +} + +static int add_backup_segments(struct kexec_info *info, + unsigned long backup_base, + unsigned long backup_size) +{ + unsigned long mem_base, mem_size, bkseg_base, bkseg_size, start, end; + unsigned long pagesize; + + pagesize = getpagesize(); + while (backup_size) { + mem_base = backup_base; + mem_size = backup_size; + if (!find_memory_range(info, &mem_base, &mem_size)) + break; + backup_size = backup_base + backup_size - \ + (mem_base + mem_size); + backup_base = mem_base + mem_size; + while (mem_size) { + bkseg_base = mem_base; + bkseg_size = mem_size; + if (sort_segments(info) < 0) + return -1; + if (!find_segment_hole(info, &bkseg_base, &bkseg_size)) + break; + start = _ALIGN(bkseg_base, pagesize); + end = _ALIGN_DOWN(bkseg_base + bkseg_size, pagesize); + add_segment_phys_virt(info, NULL, 0, + start, end-start, 0); + mem_size = mem_base + mem_size - \ + (bkseg_base + bkseg_size); + mem_base = bkseg_base + bkseg_size; + } + } + return 0; +} + +static char *slurp_fd(int fd, const char *filename, off_t size, off_t *nread) +{ + char *buf; + off_t progress; + ssize_t result; + + buf = xmalloc(size); + progress = 0; + while (progress < size) { + result = read(fd, buf + progress, size - progress); + if (result < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + fprintf(stderr, "Read on %s failed: %s\n", filename, + strerror(errno)); + free(buf); + close(fd); + return NULL; + } + if (result == 0) + /* EOF */ + break; + progress += result; + } + result = close(fd); + if (result < 0) + die("Close of %s failed: %s\n", filename, strerror(errno)); + + if (nread) + *nread = progress; + return buf; +} + +static char *slurp_file_generic(const char *filename, off_t *r_size, + int use_mmap) +{ + int fd; + char *buf; + off_t size, err, nread; + ssize_t result; + struct stat stats; + + if (!filename) { + *r_size = 0; + return 0; + } + fd = open(filename, O_RDONLY | _O_BINARY); + if (fd < 0) { + die("Cannot open `%s': %s\n", + filename, strerror(errno)); + } + result = fstat(fd, &stats); + if (result < 0) { + die("Cannot stat: %s: %s\n", + filename, strerror(errno)); + } + /* + * Seek in case the kernel is a character node like /dev/ubi0_0. + * This does not work on regular files which live in /proc and + * we need this for some /proc/device-tree entries + */ + if (S_ISCHR(stats.st_mode)) { + + size = lseek(fd, 0, SEEK_END); + if (size < 0) + die("Can not seek file %s: %s\n", filename, + strerror(errno)); + + err = lseek(fd, 0, SEEK_SET); + if (err < 0) + die("Can not seek to the begin of file %s: %s\n", + filename, strerror(errno)); + buf = slurp_fd(fd, filename, size, &nread); + } else if (S_ISBLK(stats.st_mode)) { + err = ioctl(fd, BLKGETSIZE64, &size); + if (err < 0) + die("Can't retrieve size of block device %s: %s\n", + filename, strerror(errno)); + buf = slurp_fd(fd, filename, size, &nread); + } else { + size = stats.st_size; + if (use_mmap) { + buf = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE, fd, 0); + nread = size; + } else { + buf = slurp_fd(fd, filename, size, &nread); + } + } + if ((use_mmap && (buf == MAP_FAILED)) || (!use_mmap && (buf == NULL))) + die("Cannot read %s", filename); + + if (nread != size) + die("Read on %s ended before stat said it should\n", filename); + + *r_size = size; + close(fd); + return buf; +} + +/* + * Read file into malloced buffer. + */ +char *slurp_file(const char *filename, off_t *r_size) +{ + return slurp_file_generic(filename, r_size, 0); +} + +/* + * Map "normal" file or read "character device" into malloced buffer. + * You must not use free, realloc, etc. for the returned buffer. + */ +char *slurp_file_mmap(const char *filename, off_t *r_size) +{ + return slurp_file_generic(filename, r_size, 1); +} + +/* This functions reads either specified number of bytes from the file or + lesser if EOF is met. */ + +char *slurp_file_len(const char *filename, off_t size, off_t *nread) +{ + int fd; + + if (!filename) + return 0; + fd = open(filename, O_RDONLY | _O_BINARY); + if (fd < 0) { + fprintf(stderr, "Cannot open %s: %s\n", filename, + strerror(errno)); + return 0; + } + + return slurp_fd(fd, filename, size, nread); +} + +char *slurp_decompress_file(const char *filename, off_t *r_size) +{ + char *kernel_buf; + + kernel_buf = zlib_decompress_file(filename, r_size); + if (!kernel_buf) { + kernel_buf = lzma_decompress_file(filename, r_size); + if (!kernel_buf) + return slurp_file(filename, r_size); + } + return kernel_buf; +} + +static int copybuf_memfd(const char *kernel_buf, size_t size) +{ + int fd, count; + + fd = memfd_create("kernel", MFD_ALLOW_SEALING); + if (fd == -1) + return fd; + + count = write(fd, kernel_buf, size); + if (count < 0) + return -1; + + return fd; +} + +static void update_purgatory(struct kexec_info *info) +{ + static const uint8_t null_buf[256]; + sha256_context ctx; + sha256_digest_t digest; + struct sha256_region region[SHA256_REGIONS]; + int i, j; + /* Don't do anything if we are not using purgatory */ + if (!info->rhdr.e_shdr) { + return; + } + arch_update_purgatory(info); + + if (info->skip_checks) { + unsigned int tmp = 1; + + elf_rel_set_symbol(&info->rhdr, "skip_checks", &tmp, + sizeof(tmp)); + return; + } + + memset(region, 0, sizeof(region)); + sha256_starts(&ctx); + /* Compute a hash of the loaded kernel */ + for(j = i = 0; i < info->nr_segments; i++) { + unsigned long nullsz; + /* Don't include purgatory in the checksum. The stack + * in the bss will definitely change, and the .data section + * will also change when we poke the sha256_digest in there. + * A very clever/careful person could probably improve this. + */ + if (info->segment[i].mem == (void *)info->rhdr.rel_addr) { + continue; + } + sha256_update(&ctx, info->segment[i].buf, + info->segment[i].bufsz); + nullsz = info->segment[i].memsz - info->segment[i].bufsz; + while(nullsz) { + unsigned long bytes = nullsz; + if (bytes > sizeof(null_buf)) { + bytes = sizeof(null_buf); + } + sha256_update(&ctx, null_buf, bytes); + nullsz -= bytes; + } + region[j].start = (unsigned long) info->segment[i].mem; + region[j].len = info->segment[i].memsz; + j++; + } + sha256_finish(&ctx, digest); + elf_rel_set_symbol(&info->rhdr, "sha256_regions", ®ion, + sizeof(region)); + elf_rel_set_symbol(&info->rhdr, "sha256_digest", &digest, + sizeof(digest)); +} + +/* + * Load the new kernel + */ +static int my_load(const char *type, int fileind, int argc, char **argv, + unsigned long kexec_flags, int skip_checks, void *entry) +{ + char *kernel; + char *kernel_buf; + off_t kernel_size; + int i = 0; + int result; + struct kexec_info info; + long native_arch; + int guess_only = 0; + + memset(&info, 0, sizeof(info)); + info.kexec_flags = kexec_flags; + info.skip_checks = skip_checks; + + result = 0; + if (argc - fileind <= 0) { + fprintf(stderr, "No kernel specified\n"); + usage(); + return -1; + } + kernel = argv[fileind]; + /* slurp in the input kernel */ + kernel_buf = slurp_decompress_file(kernel, &kernel_size); + + dbgprintf("kernel: %p kernel_size: %#llx\n", + kernel_buf, (unsigned long long)kernel_size); + + if (get_memory_ranges(&info.memory_range, &info.memory_ranges, + info.kexec_flags) < 0 || info.memory_ranges == 0) { + fprintf(stderr, "Could not get memory layout\n"); + return -1; + } + /* if a kernel type was specified, try to honor it */ + if (type) { + for (i = 0; i < file_types; i++) { + if (strcmp(type, file_type[i].name) == 0) + break; + } + if (i == file_types) { + fprintf(stderr, "Unsupported kernel type %s\n", type); + return -1; + } else { + /* make sure our file is really of that type */ + if (file_type[i].probe(kernel_buf, kernel_size) < 0) + guess_only = 1; + } + } + if (!type || guess_only) { + for (i = 0; i < file_types; i++) { + if (file_type[i].probe(kernel_buf, kernel_size) == 0) + break; + } + if (i == file_types) { + fprintf(stderr, "Cannot determine the file type " + "of %s\n", kernel); + return -1; + } else { + if (guess_only) { + fprintf(stderr, "Wrong file type %s, " + "file matches type %s\n", + type, file_type[i].name); + return -1; + } + } + } + /* Figure out our native architecture before load */ + native_arch = physical_arch(); + if (native_arch < 0) { + return -1; + } + info.kexec_flags |= native_arch; + + result = file_type[i].load(argc, argv, kernel_buf, kernel_size, &info); + if (result < 0) { + switch (result) { + case ENOCRASHKERNEL: + fprintf(stderr, + "No crash kernel segment found in /proc/iomem\n" + "Please check the crashkernel= boot parameter.\n"); + break; + case EFAILED: + default: + fprintf(stderr, "Cannot load %s\n", kernel); + break; + } + return result; + } + /* If we are not in native mode setup an appropriate trampoline */ + if (arch_compat_trampoline(&info) < 0) { + return -1; + } + if (info.kexec_flags & KEXEC_PRESERVE_CONTEXT) { + add_backup_segments(&info, mem_min, mem_max - mem_min + 1); + } + /* Verify all of the segments load to a valid location in memory */ + for (i = 0; i < info.nr_segments; i++) { + if (!valid_memory_segment(&info, info.segment +i)) { + fprintf(stderr, "Invalid memory segment %p - %p\n", + info.segment[i].mem, + ((char *)info.segment[i].mem) + + info.segment[i].memsz); + return -1; + } + } + /* Sort the segments and verify we don't have overlaps */ + if (sort_segments(&info) < 0) { + return -1; + } + /* if purgatory is loaded update it */ + update_purgatory(&info); + if (entry) + info.entry = entry; + + dbgprintf("kexec_load: entry = %p flags = 0x%lx\n", + info.entry, info.kexec_flags); + if (kexec_debug) + print_segments(stderr, &info); + + if (xen_present()) + result = xen_kexec_load(&info); + else + result = kexec_load(info.entry, + info.nr_segments, info.segment, + info.kexec_flags); + if (result != 0) { + /* The load failed, print some debugging information */ + fprintf(stderr, "kexec_load failed: %s\n", + strerror(errno)); + fprintf(stderr, "entry = %p flags = 0x%lx\n", + info.entry, info.kexec_flags); + print_segments(stderr, &info); + } + return result; +} + +static int kexec_file_unload(unsigned long kexec_file_flags) +{ + int ret = 0; + + if (!is_kexec_file_load_implemented()) + return EFALLBACK; + + ret = kexec_file_load(-1, -1, 0, NULL, kexec_file_flags); + if (ret != 0) { + if (errno == ENOSYS) { + ret = EFALLBACK; + } else { + /* + * The unload failed, print some debugging + * information */ + fprintf(stderr, "kexec_file_load(unload) failed: %s\n", + strerror(errno)); + ret = EFAILED; + } + } + return ret; +} + +static int k_unload (unsigned long kexec_flags) +{ + int result; + long native_arch; + + /* set the arch */ + native_arch = physical_arch(); + if (native_arch < 0) { + return -1; + } + kexec_flags |= native_arch; + + if (xen_present()) + result = xen_kexec_unload(kexec_flags); + else + result = kexec_load(NULL, 0, NULL, kexec_flags); + if (result != 0) { + /* The unload failed, print some debugging information */ + fprintf(stderr, "kexec unload failed: %s\n", + strerror(errno)); + } + return result; +} + +/* + * Start a reboot. + */ +static int my_shutdown(void) +{ + char *args[] = { + "shutdown", + "-r", + "now", + NULL + }; + + execv("/sbin/shutdown", args); + execv("/etc/shutdown", args); + execv("/bin/shutdown", args); + + perror("shutdown"); + return -1; +} + +/* + * Exec the new kernel. If successful, this triggers an immediate reboot + * and does not return, but Xen Live Update is an exception (more on this + * below). + */ +static int my_exec(void) +{ + if (xen_present()) { + int ret; + + /* + * There are two cases in which the Xen hypercall may return: + * 1) An error occurred, e.g. the kexec image was not loaded. + * The exact error is indicated by errno. + * 2) Live Update was successfully scheduled. Note that unlike + * a normal kexec, Live Update happens asynchronously, i.e. + * the hypercall merely schedules the kexec operation and + * returns immediately. + */ + ret = xen_kexec_exec(kexec_flags); + if ((kexec_flags & KEXEC_LIVE_UPDATE) && !ret) + return 0; + } else + reboot(LINUX_REBOOT_CMD_KEXEC); + /* I have failed if I make it here */ + fprintf(stderr, "kexec failed: %s\n", + strerror(errno)); + return -1; +} + +static int load_jump_back_helper_image(unsigned long kexec_flags, void *entry) +{ + int result; + struct kexec_segment seg; + + memset(&seg, 0, sizeof(seg)); + result = kexec_load(entry, 1, &seg, kexec_flags); + return result; +} + +static int kexec_loaded(const char *file) +{ + long ret = -1; + FILE *fp; + char *p; + char line[3]; + + /* No way to tell if an image is loaded under Xen, assume it is. */ + if (xen_present()) + return 1; + + fp = fopen(file, "r"); + if (fp == NULL) + return -1; + + p = fgets(line, sizeof(line), fp); + fclose(fp); + + if (p == NULL) + return -1; + + ret = strtol(line, &p, 10); + + /* Too long */ + if (ret > INT_MAX) + return -1; + + /* No digits were found */ + if (p == line) + return -1; + + return (int)ret; +} + +/* + * Jump back to the original kernel + */ +static int my_load_jump_back_helper(unsigned long kexec_flags, void *entry) +{ + int result; + + if (kexec_loaded(KEXEC_LOADED_PATH)) { + fprintf(stderr, "There is kexec kernel loaded, make sure " + "you are in kexeced kernel.\n"); + return -1; + } + if (!entry) { + fprintf(stderr, "Please specify jump back entry " + "in command line\n"); + return -1; + } + result = load_jump_back_helper_image(kexec_flags, entry); + if (result) { + fprintf(stderr, "load jump back kernel failed: %s\n", + strerror(errno)); + return result; + } + return result; +} + +static void version(void) +{ + printf(PACKAGE_STRING "\n"); +} + +void usage(void) +{ + int i; + + version(); + printf("Usage: kexec [OPTION]... [kernel]\n" + "Directly reboot into a new kernel\n" + "\n" + " -h, --help Print this help.\n" + " -v, --version Print the version of kexec.\n" + " -f, --force Force an immediate kexec,\n" + " don't call shutdown.\n" + " -i, --no-checks Fast reboot, no memory integrity checks.\n" + " -x, --no-ifdown Don't bring down network interfaces.\n" + " -y, --no-sync Don't sync filesystems before kexec.\n" + " -l, --load Load the new kernel into the\n" + " current kernel.\n" + " -p, --load-panic Load the new kernel for use on panic.\n" + " -u, --unload Unload the current kexec target kernel.\n" + " If capture kernel is being unloaded\n" + " specify -p with -u.\n" + " -e, --exec Execute a currently loaded kernel.\n" + " --exec-live-update Execute a currently loaded xen image after\n" + "storing the state required to live update.\n" + " -t, --type=TYPE Specify the new kernel is of this type.\n" + " --mem-min=<addr> Specify the lowest memory address to\n" + " load code into.\n" + " --mem-max=<addr> Specify the highest memory address to\n" + " load code into.\n" + " --reuseinitrd Reuse initrd from first boot.\n" + " --print-ckr-size Print crash kernel region size.\n" + " --load-preserve-context Load the new kernel and preserve\n" + " context of current kernel during kexec.\n" + " --load-jump-back-helper Load a helper image to jump back\n" + " to original kernel.\n" + " --load-live-update Load the new kernel to overwrite the\n" + " running kernel.\n" + " --entry=<addr> Specify jump back address.\n" + " (0 means it's not jump back or\n" + " preserve context)\n" + " to original kernel.\n" + " -s, --kexec-file-syscall Use file based syscall for kexec operation\n" + " -c, --kexec-syscall Use the kexec_load syscall for for compatibility\n" + " with systems that don't support -s\n" + " -a, --kexec-syscall-auto Use file based syscall for kexec and fall\n" + " back to the compatibility syscall when file based\n" + " syscall is not supported or the kernel did not\n" + " understand the image (default)\n" + " -d, --debug Enable debugging to help spot a failure.\n" + " -S, --status Return 1 if the type (by default crash) is loaded,\n" + " 0 if not.\n" + "\n" + "Supported kernel file types and options: \n"); + for (i = 0; i < file_types; i++) { + printf("%s\n", file_type[i].name); + file_type[i].usage(); + } + printf( "Architecture options: \n"); + arch_usage(); + printf("\n"); +} + +static int k_status(unsigned long kexec_flags) +{ + int result; + long native_arch; + + /* set the arch */ + native_arch = physical_arch(); + if (native_arch < 0) { + return -1; + } + kexec_flags |= native_arch; + + if (xen_present()) + result = xen_kexec_status(kexec_flags); + else { + if (kexec_flags & KEXEC_ON_CRASH) + result = kexec_loaded(KEXEC_CRASH_LOADED_PATH); + else + result = kexec_loaded(KEXEC_LOADED_PATH); + } + return result; +} + + +/* + * Remove parameter from a kernel command line. Helper function by get_command_line(). + */ +void remove_parameter(char *line, const char *param_name) +{ + char *start, *end; + + start = strstr(line, param_name); + + /* parameter not found */ + if (!start) + return; + + /* + * check if that's really the start of a parameter and not in + * the middle of the word + */ + if (start != line && !isspace(*(start-1))) + return; + + end = strstr(start, " "); + if (!end) + *start = 0; + else { + memmove(start, end+1, strlen(end)); + *(end + strlen(end)) = 0; + } +} + +static ssize_t _read(int fd, void *buf, size_t count) +{ + ssize_t ret, offset = 0; + + do { + ret = read(fd, buf + offset, count - offset); + if (ret < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + return ret; + } + offset += ret; + } while (ret && offset < count); + + return offset; +} + +static char *slurp_proc_file(const char *filename, size_t *len) +{ + ssize_t ret, startpos = 0; + unsigned int size = 64; + char *buf = NULL, *tmp; + int fd; + + fd = open(filename, O_RDONLY); + if (fd == -1) + return NULL; + + do { + size *= 2; + tmp = realloc(buf, size); + if (!tmp) { + free(buf); + return NULL; + } + buf = tmp; + + ret = _read(fd, buf + startpos, size - startpos); + if (ret < 0) { + free(buf); + return NULL; + } + + startpos += ret; + + } while(ret); + + *len = startpos; + return buf; +} + +/* + * Returns the contents of the current command line to be used with + * --reuse-cmdline option. The function gets called from architecture specific + * code. If we load a panic kernel, that function will strip the + * "crashkernel=" option because it does not make sense that the crashkernel + * reserves memory for a crashkernel (well, it would not boot since the + * amount is exactly the same as the crashkernel has overall memory). Also, + * remove the BOOT_IMAGE from lilo (and others) since that doesn't make + * sense here any more. The kernel could be different even if we reuse the + * commandline. + * + * The function returns dynamically allocated memory. + */ +char *get_command_line(void) +{ + char *p, *line; + size_t size; + + line = slurp_proc_file("/proc/cmdline", &size); + if (!line || !size) + die("Failed to read /proc/cmdline\n"); + + /* strip newline */ + line[size-1] = '\0'; + + p = strpbrk(line, "\r\n"); + if (p) + *p = '\0'; + + remove_parameter(line, "BOOT_IMAGE"); + if (kexec_flags & KEXEC_ON_CRASH) + remove_parameter(line, "crashkernel"); + + return line; +} + +/* check we retained the initrd */ +static void check_reuse_initrd(void) +{ + char *str = NULL; + char *line = get_command_line(); + + str = strstr(line, "retain_initrd"); + free(line); + + if (str == NULL) + die("unrecoverable error: current boot didn't " + "retain the initrd for reuse.\n"); +} + +char *concat_cmdline(const char *base, const char *append) +{ + char *cmdline; + if (!base && !append) + return NULL; + if (append && !base) + return xstrdup(append); + if (base && !append) + return xstrdup(base); + cmdline = xmalloc(strlen(base) + 1 + strlen(append) + 1); + strcpy(cmdline, base); + strcat(cmdline, " "); + strcat(cmdline, append); + return cmdline; +} + +void cmdline_add_liveupdate(char **base) +{ + uint64_t lu_start, lu_end, lu_sizeM; + char *str; + char buf[64]; + size_t len; + + if ( !xen_present() ) + return; + + xen_get_kexec_range(KEXEC_RANGE_MA_LIVEUPDATE, &lu_start, &lu_end); + lu_sizeM = (lu_end - lu_start) / (1024 * 1024) + 1; + sprintf(buf, " liveupdate=%lluM@0x%llx", (unsigned long long)lu_sizeM, + (unsigned long long)lu_start); + len = strlen(*base) + strlen(buf) + 1; + str = xmalloc(len); + sprintf(str, "%s%s", *base, buf); + *base = str; +} + +/* New file based kexec system call related code */ +static int do_kexec_file_load(int fileind, int argc, char **argv, + unsigned long flags) { + + char *kernel; + int kernel_fd, i; + struct kexec_info info; + int ret = 0; + char *kernel_buf; + off_t kernel_size; + + memset(&info, 0, sizeof(info)); + info.segment = NULL; + info.nr_segments = 0; + info.entry = NULL; + info.backup_start = 0; + info.kexec_flags = flags; + + info.file_mode = 1; + info.kernel_fd = -1; + info.initrd_fd = -1; + + if (!is_kexec_file_load_implemented()) + return EFALLBACK; + + if (argc - fileind <= 0) { + fprintf(stderr, "No kernel specified\n"); + usage(); + return EFAILED; + } + + kernel = argv[fileind]; + + /* slurp in the input kernel */ + kernel_buf = slurp_decompress_file(kernel, &kernel_size); + if (!kernel_buf) { + fprintf(stderr, "Failed to decompress file %s:%s\n", kernel, + strerror(errno)); + return EFAILED; + } + kernel_fd = copybuf_memfd(kernel_buf, kernel_size); + if (kernel_fd < 0) { + fprintf(stderr, "Failed to copy decompressed buf\n"); + return EFAILED; + } + + for (i = 0; i < file_types; i++) { + if (file_type[i].probe(kernel_buf, kernel_size) >= 0) + break; + } + + if (i == file_types) { + fprintf(stderr, "Cannot determine the file type " "of %s\n", + kernel); + close(kernel_fd); + return EFAILED; + } + + ret = file_type[i].load(argc, argv, kernel_buf, kernel_size, &info); + if (ret < 0) { + fprintf(stderr, "Cannot load %s\n", kernel); + close(kernel_fd); + return ret; + } + + /* + * image type specific load functioin detect the capsule kernel type + * and create another fd for file load. For example the zboot kernel. + */ + if (info.kernel_fd != -1) + kernel_fd = info.kernel_fd; + + /* + * If there is no initramfs, set KEXEC_FILE_NO_INITRAMFS flag so that + * kernel does not return error with negative initrd_fd. + */ + if (info.initrd_fd == -1) + info.kexec_flags |= KEXEC_FILE_NO_INITRAMFS; + + ret = kexec_file_load(kernel_fd, info.initrd_fd, info.command_line_len, + info.command_line, info.kexec_flags); + if (ret != 0) { + switch (errno) { + /* + * Something failed with signature verification. + * Reject the image. + */ + case ELIBBAD: + case EKEYREJECTED: + case ENOPKG: + case ENOKEY: + case EBADMSG: + case EMSGSIZE: + /* Reject by default. */ + default: + fprintf(stderr, "kexec_file_load failed: %s\n", strerror(errno)); + ret = EFAILED; + break; + + /* Not implemented. */ + case ENOSYS: + /* + * Parsing image or other options failed + * The image may be invalid or image + * type may not supported by kernel so + * retry parsing in kexec-tools. + */ + case EINVAL: + case ENOEXEC: + /* + * ENOTSUP can be unsupported image + * type or unsupported PE signature + * wrapper type, duh. + */ + case ENOTSUP: + ret = EFALLBACK; + break; + } + } + + close(kernel_fd); + return ret; +} + +static void print_crashkernel_region_size(void) +{ + uint64_t start = 0, end = 0; + + if (is_crashkernel_mem_reserved() && + get_crash_kernel_load_range(&start, &end)) { + fprintf(stderr, "get_crash_kernel_load_range() failed.\n"); + return; + } + + printf("%" PRIu64 "\n", (start != end) ? (end - start + 1) : 0UL); +} + +int main(int argc, char *argv[]) +{ + int has_opt_load = 0; + int do_load = 1; + int do_exec = 0; + int do_load_jump_back_helper = 0; + int do_shutdown = 1; + int do_sync = 1, skip_sync = 0; + int do_ifdown = 0, skip_ifdown = 0; + int do_unload = 0; + int do_reuse_initrd = 0; + int do_kexec_file_syscall = 1; + int do_kexec_fallback = 1; + int skip_checks = 0; + int do_status = 0; + void *entry = 0; + char *type = 0; + char *endptr; + int opt; + int result = 0; + int fileind; + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + + /* Reset getopt for the next pass. */ + opterr = 1; + optind = 1; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch(opt) { + case '?': + usage(); + return 1; + case OPT_HELP: + usage(); + return 0; + case OPT_VERSION: + version(); + return 0; + case OPT_DEBUG: + kexec_debug = 1; + break; + case OPT_NOIFDOWN: + skip_ifdown = 1; + break; + case OPT_NOSYNC: + skip_sync = 1; + break; + case OPT_FORCE: + do_load = 1; + do_shutdown = 0; + do_sync = 1; + do_ifdown = 1; + do_exec = 1; + break; + case OPT_LOAD: + has_opt_load = 1; + do_load = 1; + do_exec = 0; + do_shutdown = 0; + break; + case OPT_UNLOAD: + do_load = 0; + do_shutdown = 0; + do_sync = 0; + do_unload = 1; + kexec_file_flags |= KEXEC_FILE_UNLOAD; + break; + case OPT_EXEC_LIVE_UPDATE: + if ( !xen_present() ) { + fprintf(stderr, "--exec-live-update only works under xen.\n"); + return 1; + } + kexec_flags |= KEXEC_LIVE_UPDATE; + /* fallthrough */ + case OPT_EXEC: + do_load = 0; + do_shutdown = 0; + do_sync = 1; + do_ifdown = 1; + do_exec = 1; + break; + case OPT_LOAD_JUMP_BACK_HELPER: + do_load = 0; + do_shutdown = 0; + do_sync = 1; + do_ifdown = 1; + do_exec = 0; + do_load_jump_back_helper = 1; + kexec_flags = KEXEC_PRESERVE_CONTEXT; + break; + case OPT_ENTRY: + entry = (void *)strtoul(optarg, &endptr, 0); + if (*endptr) { + fprintf(stderr, + "Bad option value in --entry=%s\n", + optarg); + usage(); + return 1; + } + break; + case OPT_LOAD_PRESERVE_CONTEXT: + case OPT_LOAD_LIVE_UPDATE: + do_load = 1; + do_exec = 0; + do_shutdown = 0; + do_sync = 1; + kexec_flags = (opt == OPT_LOAD_PRESERVE_CONTEXT) ? + KEXEC_PRESERVE_CONTEXT : KEXEC_LIVE_UPDATE; + break; + case OPT_TYPE: + type = optarg; + break; + case OPT_PANIC: + do_load = 1; + do_exec = 0; + do_shutdown = 0; + do_sync = 0; + kexec_file_flags |= KEXEC_FILE_ON_CRASH; + kexec_flags = KEXEC_ON_CRASH; + break; + case OPT_MEM_MIN: + mem_min = strtoul(optarg, &endptr, 0); + if (*endptr) { + fprintf(stderr, + "Bad option value in --mem-min=%s\n", + optarg); + usage(); + return 1; + } + break; + case OPT_MEM_MAX: + mem_max = strtoul(optarg, &endptr, 0); + if (*endptr) { + fprintf(stderr, + "Bad option value in --mem-max=%s\n", + optarg); + usage(); + return 1; + } + break; + case OPT_REUSE_INITRD: + do_reuse_initrd = 1; + break; + case OPT_KEXEC_FILE_SYSCALL: + do_kexec_file_syscall = 1; + do_kexec_fallback = 0; + break; + case OPT_KEXEC_SYSCALL: + do_kexec_file_syscall = 0; + do_kexec_fallback = 0; + break; + case OPT_KEXEC_SYSCALL_AUTO: + do_kexec_file_syscall = 1; + do_kexec_fallback = 1; + break; + case OPT_NOCHECKS: + skip_checks = 1; + break; + case OPT_STATUS: + do_status = 1; + break; + case OPT_PRINT_CKR_SIZE: + print_crashkernel_region_size(); + return 0; + default: + break; + } + } + + if (skip_ifdown) + do_ifdown = 0; + if (skip_sync) + do_sync = 0; + + if (do_status) { + if (kexec_flags == 0 && !has_opt_load) + kexec_flags = KEXEC_ON_CRASH; + do_load = 0; + do_reuse_initrd = 0; + do_unload = 0; + do_load = 0; + do_shutdown = 0; + do_sync = 0; + do_ifdown = 0; + do_exec = 0; + do_load_jump_back_helper = 0; + } + + if (do_load && + ((kexec_flags & KEXEC_ON_CRASH) || + (kexec_file_flags & KEXEC_FILE_ON_CRASH)) && + !is_crashkernel_mem_reserved()) { + die("Memory for crashkernel is not reserved\n" + "Please reserve memory by passing" + "\"crashkernel=Y@X\" parameter to kernel\n" + "Then try to loading kdump kernel\n"); + } + + if (do_load && (kexec_flags & KEXEC_PRESERVE_CONTEXT) && + mem_max == ULONG_MAX) { + die("Please specify memory range used by kexeced kernel\n" + "to preserve the context of original kernel with \n" + "\"--mem-max\" parameter\n"); + } + + if (do_load && (kexec_flags & KEXEC_LIVE_UPDATE) && + !xen_present()) { + die("--load-live-update can only be used with xen\n"); + } + + fileind = optind; + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + + result = arch_process_options(argc, argv); + + /* Check for bogus options */ + if (!do_load) { + while((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + if ((opt == '?') || (opt >= OPT_ARCH_MAX)) { + usage(); + return 1; + } + } + } + if (do_kexec_file_syscall) { + if (do_load_jump_back_helper && !do_kexec_fallback) + die("--load-jump-back-helper not supported with kexec_file_load\n"); + if (kexec_flags & KEXEC_PRESERVE_CONTEXT) + die("--load-preserve-context not supported with kexec_file_load\n"); + } + + if (do_reuse_initrd){ + check_reuse_initrd(); + arch_reuse_initrd(); + } + if (do_status) { + result = k_status(kexec_flags); + } + if (do_unload) { + if (do_kexec_file_syscall) { + result = kexec_file_unload(kexec_file_flags); + if (result == EFALLBACK && do_kexec_fallback) { + /* Reset getopt for fallback */ + opterr = 1; + optind = 1; + do_kexec_file_syscall = 0; + } + } + if (!do_kexec_file_syscall) + result = k_unload(kexec_flags); + } + if (do_load && (result == 0)) { + if (do_kexec_file_syscall) { + result = do_kexec_file_load(fileind, argc, argv, + kexec_file_flags); + if (result == EFALLBACK && do_kexec_fallback) { + /* Reset getopt for fallback */ + opterr = 1; + optind = 1; + do_kexec_file_syscall = 0; + } + } + if (!do_kexec_file_syscall) + result = my_load(type, fileind, argc, argv, + kexec_flags, skip_checks, entry); + } + /* Don't shutdown unless there is something to reboot to! */ + if ((result == 0) && (do_shutdown || do_exec) && !kexec_loaded(KEXEC_LOADED_PATH)) { + die("Nothing has been loaded!\n"); + } + if ((result == 0) && do_shutdown) { + result = my_shutdown(); + } + if ((result == 0) && do_sync) { + sync(); + } + if ((result == 0) && do_ifdown) { + ifdown(); + } + if ((result == 0) && do_exec) { + result = my_exec(); + } + if ((result == 0) && do_load_jump_back_helper) { + result = my_load_jump_back_helper(kexec_flags, entry); + } + if (result == EFALLBACK) + fputs("syscall kexec_file_load not available.\n", stderr); + + fflush(stdout); + fflush(stderr); + return result; +} diff --git a/kexec/kexec.h b/kexec/kexec.h new file mode 100644 index 0000000..ed3b499 --- /dev/null +++ b/kexec/kexec.h @@ -0,0 +1,350 @@ +#ifndef KEXEC_H +#define KEXEC_H + +#include "config.h" + +#include <sys/types.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#define USE_BSD +#include <byteswap.h> +#include <endian.h> +#define _GNU_SOURCE + +#include "kexec-elf.h" +#include "unused.h" + +#ifndef BYTE_ORDER +#error BYTE_ORDER not defined +#endif + +#ifndef LITTLE_ENDIAN +#error LITTLE_ENDIAN not defined +#endif + +#ifndef BIG_ENDIAN +#error BIG_ENDIAN not defined +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +#define cpu_to_le16(val) (val) +#define cpu_to_le32(val) (val) +#define cpu_to_le64(val) (val) +#define cpu_to_be16(val) bswap_16(val) +#define cpu_to_be32(val) bswap_32(val) +#define cpu_to_be64(val) bswap_64(val) +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#define le64_to_cpu(val) (val) +#define be16_to_cpu(val) bswap_16(val) +#define be32_to_cpu(val) bswap_32(val) +#define be64_to_cpu(val) bswap_64(val) +#elif BYTE_ORDER == BIG_ENDIAN +#define cpu_to_le16(val) bswap_16(val) +#define cpu_to_le32(val) bswap_32(val) +#define cpu_to_le64(val) bswap_64(val) +#define cpu_to_be16(val) (val) +#define cpu_to_be32(val) (val) +#define cpu_to_be64(val) (val) +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#define le64_to_cpu(val) bswap_64(val) +#define be16_to_cpu(val) (val) +#define be32_to_cpu(val) (val) +#define be64_to_cpu(val) (val) +#else +#error unknwon BYTE_ORDER +#endif + +/* + * Document some of the reasons why crashdump may fail, so we can give + * better error messages + */ +#define EFAILED -1 /* default error code */ +#define ENOCRASHKERNEL -2 /* no memory reserved for crashkernel */ +#define EFALLBACK -3 /* fallback to kexec_load(2) may work */ + +/* + * This function doesn't actually exist. The idea is that when someone + * uses the macros below with an unsupported size (datatype), the linker + * will alert us to the problem via an unresolved reference error. + */ +extern unsigned long bad_unaligned_access_length (void); + +#define get_unaligned(loc) \ +({ \ + __typeof__(*(loc)) _v; \ + size_t size = sizeof(*(loc)); \ + switch(size) { \ + case 1: case 2: case 4: case 8: \ + memcpy(&_v, (loc), size); \ + break; \ + default: \ + _v = bad_unaligned_access_length(); \ + break; \ + } \ + _v; \ +}) + +#define put_unaligned(value, loc) \ +do { \ + size_t size = sizeof(*(loc)); \ + __typeof__(*(loc)) _v = value; \ + switch(size) { \ + case 1: case 2: case 4: case 8: \ + memcpy((loc), &_v, size); \ + break; \ + default: \ + bad_unaligned_access_length(); \ + break; \ + } \ +} while(0) + +#define _ALIGN_UP_MASK(addr, mask) (((addr) + (mask)) & ~(mask)) +#define _ALIGN_DOWN_MASK(addr, mask) ((addr) & ~(mask)) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr, size) \ + _ALIGN_UP_MASK(addr, (typeof(addr))(size) - 1) +#define _ALIGN_DOWN(addr, size) \ + _ALIGN_DOWN_MASK(addr, (typeof(addr))(size) - 1) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr, size) _ALIGN_UP(addr, size) + +extern unsigned long long mem_min, mem_max; +extern int kexec_debug; + +#define dbgprintf(...) \ +do { \ + if (kexec_debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while(0) + +struct kexec_segment { + const void *buf; + size_t bufsz; + const void *mem; + size_t memsz; +}; + +struct memory_range { + unsigned long long start, end; + unsigned type; +#define RANGE_RAM 0 +#define RANGE_RESERVED 1 +#define RANGE_ACPI 2 +#define RANGE_ACPI_NVS 3 +#define RANGE_UNCACHED 4 +#define RANGE_PMEM 6 +#define RANGE_PRAM 11 +}; + +struct memory_ranges { + unsigned int size; + unsigned int max_size; + struct memory_range *ranges; +}; + +struct kexec_info { + struct kexec_segment *segment; + int nr_segments; + struct memory_range *memory_range; + int memory_ranges; + struct memory_range *crash_range; + int nr_crash_ranges; + void *entry; + struct mem_ehdr rhdr; + unsigned long backup_start; + unsigned long kexec_flags; + unsigned long backup_src_start; + unsigned long backup_src_size; + /* Set to 1 if we are using kexec file syscall */ + unsigned long file_mode :1; + + /* Filled by kernel image processing code */ + int kernel_fd; + int initrd_fd; + char *command_line; + int command_line_len; + + int skip_checks; +}; + +struct arch_map_entry { + const char *machine; + unsigned long arch; +}; + +extern const struct arch_map_entry arches[]; +long physical_arch(void); + +void usage(void); +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags); +int valid_memory_range(struct kexec_info *info, + unsigned long sstart, unsigned long send); +void print_segments(FILE *file, struct kexec_info *info); +int sort_segments(struct kexec_info *info); +unsigned long locate_hole(struct kexec_info *info, + unsigned long hole_size, unsigned long hole_align, + unsigned long hole_min, unsigned long hole_max, + int hole_end); + +typedef int (probe_t)(const char *kernel_buf, off_t kernel_size); +typedef int (load_t )(int argc, char **argv, + const char *kernel_buf, off_t kernel_size, + struct kexec_info *info); +typedef void (usage_t)(void); +struct file_type { + const char *name; + probe_t *probe; + load_t *load; + usage_t *usage; +}; + +extern struct file_type file_type[]; +extern int file_types; + +#define OPT_HELP 'h' +#define OPT_VERSION 'v' +#define OPT_DEBUG 'd' +#define OPT_FORCE 'f' +#define OPT_NOCHECKS 'i' +#define OPT_NOIFDOWN 'x' +#define OPT_NOSYNC 'y' +#define OPT_EXEC 'e' +#define OPT_LOAD 'l' +#define OPT_UNLOAD 'u' +#define OPT_TYPE 't' +#define OPT_PANIC 'p' +#define OPT_KEXEC_FILE_SYSCALL 's' +#define OPT_KEXEC_SYSCALL 'c' +#define OPT_KEXEC_SYSCALL_AUTO 'a' +#define OPT_STATUS 'S' +#define OPT_MEM_MIN 256 +#define OPT_MEM_MAX 257 +#define OPT_REUSE_INITRD 258 +#define OPT_LOAD_PRESERVE_CONTEXT 259 +#define OPT_LOAD_JUMP_BACK_HELPER 260 +#define OPT_ENTRY 261 +#define OPT_PRINT_CKR_SIZE 262 +#define OPT_LOAD_LIVE_UPDATE 263 +#define OPT_EXEC_LIVE_UPDATE 264 +#define OPT_MAX 265 +#define KEXEC_OPTIONS \ + { "help", 0, 0, OPT_HELP }, \ + { "version", 0, 0, OPT_VERSION }, \ + { "force", 0, 0, OPT_FORCE }, \ + { "no-checks", 0, 0, OPT_NOCHECKS }, \ + { "no-ifdown", 0, 0, OPT_NOIFDOWN }, \ + { "no-sync", 0, 0, OPT_NOSYNC }, \ + { "load", 0, 0, OPT_LOAD }, \ + { "unload", 0, 0, OPT_UNLOAD }, \ + { "exec", 0, 0, OPT_EXEC }, \ + { "exec-live-update", 0, 0, OPT_EXEC_LIVE_UPDATE}, \ + { "load-preserve-context", 0, 0, OPT_LOAD_PRESERVE_CONTEXT}, \ + { "load-jump-back-helper", 0, 0, OPT_LOAD_JUMP_BACK_HELPER }, \ + { "load-live-update", 0, 0, OPT_LOAD_LIVE_UPDATE }, \ + { "entry", 1, 0, OPT_ENTRY }, \ + { "type", 1, 0, OPT_TYPE }, \ + { "load-panic", 0, 0, OPT_PANIC }, \ + { "mem-min", 1, 0, OPT_MEM_MIN }, \ + { "mem-max", 1, 0, OPT_MEM_MAX }, \ + { "reuseinitrd", 0, 0, OPT_REUSE_INITRD }, \ + { "kexec-file-syscall", 0, 0, OPT_KEXEC_FILE_SYSCALL }, \ + { "kexec-syscall", 0, 0, OPT_KEXEC_SYSCALL }, \ + { "kexec-syscall-auto", 0, 0, OPT_KEXEC_SYSCALL_AUTO }, \ + { "debug", 0, 0, OPT_DEBUG }, \ + { "status", 0, 0, OPT_STATUS }, \ + { "print-ckr-size", 0, 0, OPT_PRINT_CKR_SIZE }, \ + +#define KEXEC_OPT_STR "h?vdfixyluet:pscaS" + +extern void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr); +extern void die(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void *xmalloc(size_t size); +extern void *xrealloc(void *ptr, size_t size); +extern char *slurp_file(const char *filename, off_t *r_size); +extern char *slurp_file_mmap(const char *filename, off_t *r_size); +extern char *slurp_file_len(const char *filename, off_t size, off_t *nread); +extern char *slurp_decompress_file(const char *filename, off_t *r_size); +extern unsigned long virt_to_phys(unsigned long addr); +extern void add_segment(struct kexec_info *info, + const void *buf, size_t bufsz, unsigned long base, size_t memsz); +extern void add_segment_phys_virt(struct kexec_info *info, + const void *buf, size_t bufsz, unsigned long base, size_t memsz, + int phys); +extern unsigned long add_buffer(struct kexec_info *info, + const void *buf, unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, unsigned long buf_max, + int buf_end); +extern unsigned long add_buffer_virt(struct kexec_info *info, + const void *buf, unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, unsigned long buf_max, + int buf_end); +extern unsigned long add_buffer_phys_virt(struct kexec_info *info, + const void *buf, unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, unsigned long buf_max, + int buf_end, int phys); +extern void arch_reuse_initrd(void); + +extern int ifdown(void); + +extern char purgatory[]; +extern size_t purgatory_size; + +#define BOOTLOADER "kexec" +#define BOOTLOADER_VERSION PACKAGE_VERSION + +void arch_usage(void); +int arch_process_options(int argc, char **argv); +int arch_compat_trampoline(struct kexec_info *info); +void arch_update_purgatory(struct kexec_info *info); +int is_crashkernel_mem_reserved(void); +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end); +void remove_parameter(char *line, const char *param_name); +char *get_command_line(void); + +int kexec_iomem_for_each_line(char *match, + int (*callback)(void *data, + int nr, + char *str, + unsigned long long base, + unsigned long long length), + void *data); +int parse_iomem_single(char *str, uint64_t *start, uint64_t *end); +const char * proc_iomem(void); + +#define MAX_LINE 160 + +char *concat_cmdline(const char *base, const char *append); +void cmdline_add_liveupdate(char **base); + +int xen_present(void); +int xen_kexec_load(struct kexec_info *info); +int xen_kexec_unload(uint64_t kexec_flags); +int xen_kexec_exec(uint64_t kexec_flags); +int xen_kexec_status(uint64_t kexec_flags); + +extern unsigned long long get_kernel_sym(const char *text); + +/* Converts unsigned long to ascii string. */ +static inline void ultoa(unsigned long val, char *str) +{ + char buf[36]; + int len = 0, pos = 0; + + do { + buf[len++] = val % 10; + val /= 10; + } while (val); + + while (len) + str[pos++] = buf[--len] + '0'; + str[pos] = 0; +} + +#endif /* KEXEC_H */ diff --git a/kexec/libfdt/Makefile.libfdt b/kexec/libfdt/Makefile.libfdt new file mode 100644 index 0000000..9a2b4f7 --- /dev/null +++ b/kexec/libfdt/Makefile.libfdt @@ -0,0 +1,13 @@ +# Makefile.libfdt +# +# This is not a complete Makefile of itself. Instead, it is designed to +# be easily embeddable into other systems of Makefiles. +# +LIBFDT_INCLUDES = fdt.h libfdt.h +LIBFDT_SRCS = fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c +LIBFDT_OBJS = $(LIBFDT_SRCS:%.c=%.o) + +dist += kexec/libfdt/Makefile.libfdt \ + kexec/libfdt/fdt.h kexec/libfdt/libfdt.h \ + kexec/libfdt/libfdt_env.h \ + kexec/libfdt/libfdt_internal.h diff --git a/kexec/libfdt/fdt.c b/kexec/libfdt/fdt.c new file mode 100644 index 0000000..2acaec5 --- /dev/null +++ b/kexec/libfdt/fdt.c @@ -0,0 +1,201 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +int fdt_check_header(const void *fdt) +{ + if (fdt_magic(fdt) == FDT_MAGIC) { + /* Complete tree */ + if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + /* Unfinished sequential-write blob */ + if (fdt_size_dt_struct(fdt) == 0) + return -FDT_ERR_BADSTATE; + } else { + return -FDT_ERR_BADMAGIC; + } + + return 0; +} + +const void *fdt_offset_ptr(const void *fdt, int offset, int len) +{ + const char *p; + + if (fdt_version(fdt) >= 0x11) + if (((offset + len) < offset) + || ((offset + len) > fdt_size_dt_struct(fdt))) + return NULL; + + p = _fdt_offset_ptr(fdt, offset); + + if (p + len < p) + return NULL; + return p; +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset) +{ + const uint32_t *tagp, *lenp; + uint32_t tag; + const char *p; + + if (offset % FDT_TAGSIZE) + return -1; + + tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); + if (! tagp) + return FDT_END; /* premature end */ + tag = fdt32_to_cpu(*tagp); + offset += FDT_TAGSIZE; + + switch (tag) { + case FDT_BEGIN_NODE: + /* skip name */ + do { + p = fdt_offset_ptr(fdt, offset++, 1); + } while (p && (*p != '\0')); + if (! p) + return FDT_END; + break; + case FDT_PROP: + lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp)); + if (! lenp) + return FDT_END; + /* skip name offset, length and value */ + offset += 2*FDT_TAGSIZE + fdt32_to_cpu(*lenp); + break; + } + + if (nextoffset) + *nextoffset = FDT_TAGALIGN(offset); + + return tag; +} + +int _fdt_check_node_offset(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_next_node(const void *fdt, int offset, int *depth) +{ + int nextoffset = 0; + uint32_t tag; + + if (offset >= 0) + if ((nextoffset = _fdt_check_node_offset(fdt, offset)) < 0) + return nextoffset; + + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_PROP: + case FDT_NOP: + break; + + case FDT_BEGIN_NODE: + if (depth) + (*depth)++; + break; + + case FDT_END_NODE: + if (depth) + (*depth)--; + break; + + case FDT_END: + return -FDT_ERR_NOTFOUND; + + default: + return -FDT_ERR_BADSTRUCTURE; + } + } while (tag != FDT_BEGIN_NODE); + + return offset; +} + +const char *_fdt_find_string(const char *strtab, int tabsize, const char *s) +{ + int len = strlen(s) + 1; + const char *last = strtab + tabsize - len; + const char *p; + + for (p = strtab; p <= last; p++) + if (memcmp(p, s, len) == 0) + return p; + return NULL; +} + +int fdt_move(const void *fdt, void *buf, int bufsize) +{ + FDT_CHECK_HEADER(fdt); + + if (fdt_totalsize(fdt) > bufsize) + return -FDT_ERR_NOSPACE; + + memmove(buf, fdt, fdt_totalsize(fdt)); + return 0; +} diff --git a/kexec/libfdt/fdt.h b/kexec/libfdt/fdt.h new file mode 100644 index 0000000..48ccfd9 --- /dev/null +++ b/kexec/libfdt/fdt.h @@ -0,0 +1,60 @@ +#ifndef _FDT_H +#define _FDT_H + +#ifndef __ASSEMBLY__ + +struct fdt_header { + uint32_t magic; /* magic word FDT_MAGIC */ + uint32_t totalsize; /* total size of DT block */ + uint32_t off_dt_struct; /* offset to structure */ + uint32_t off_dt_strings; /* offset to strings */ + uint32_t off_mem_rsvmap; /* offset to memory reserve map */ + uint32_t version; /* format version */ + uint32_t last_comp_version; /* last compatible version */ + + /* version 2 fields below */ + uint32_t boot_cpuid_phys; /* Which physical CPU id we're + booting on */ + /* version 3 fields below */ + uint32_t size_dt_strings; /* size of the strings block */ + + /* version 17 fields below */ + uint32_t size_dt_struct; /* size of the structure block */ +}; + +struct fdt_reserve_entry { + uint64_t address; + uint64_t size; +}; + +struct fdt_node_header { + uint32_t tag; + char name[0]; +}; + +struct fdt_property { + uint32_t tag; + uint32_t len; + uint32_t nameoff; + char data[0]; +}; + +#endif /* !__ASSEMBLY */ + +#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */ +#define FDT_TAGSIZE sizeof(uint32_t) + +#define FDT_BEGIN_NODE 0x1 /* Start node: full name */ +#define FDT_END_NODE 0x2 /* End node */ +#define FDT_PROP 0x3 /* Property: name off, + size, content */ +#define FDT_NOP 0x4 /* nop */ +#define FDT_END 0x9 + +#define FDT_V1_SIZE (7*sizeof(uint32_t)) +#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(uint32_t)) +#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(uint32_t)) +#define FDT_V16_SIZE FDT_V3_SIZE +#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(uint32_t)) + +#endif /* _FDT_H */ diff --git a/kexec/libfdt/fdt_ro.c b/kexec/libfdt/fdt_ro.c new file mode 100644 index 0000000..129b532 --- /dev/null +++ b/kexec/libfdt/fdt_ro.c @@ -0,0 +1,466 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +static int _fdt_nodename_eq(const void *fdt, int offset, + const char *s, int len) +{ + const char *p = fdt_offset_ptr(fdt, offset + FDT_TAGSIZE, len+1); + + if (! p) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +const char *fdt_string(const void *fdt, int stroffset) +{ + return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset; +} + +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) +{ + FDT_CHECK_HEADER(fdt); + *address = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->address); + *size = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->size); + return 0; +} + +int fdt_num_mem_rsv(const void *fdt) +{ + int i = 0; + + while (fdt64_to_cpu(_fdt_mem_rsv(fdt, i)->size) != 0) + i++; + return i; +} + +int fdt_subnode_offset_namelen(const void *fdt, int offset, + const char *name, int namelen) +{ + int depth; + + FDT_CHECK_HEADER(fdt); + + for (depth = 0; + offset >= 0; + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth < 0) + return -FDT_ERR_NOTFOUND; + else if ((depth == 1) + && _fdt_nodename_eq(fdt, offset, name, namelen)) + return offset; + } + + return offset; /* error */ +} + +int fdt_subnode_offset(const void *fdt, int parentoffset, + const char *name) +{ + return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_path_offset(const void *fdt, const char *path) +{ + const char *end = path + strlen(path); + const char *p = path; + int offset = 0; + + FDT_CHECK_HEADER(fdt); + + if (*path != '/') + return -FDT_ERR_BADPATH; + + while (*p) { + const char *q; + + while (*p == '/') + p++; + if (! *p) + return offset; + q = strchr(p, '/'); + if (! q) + q = end; + + offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p); + if (offset < 0) + return offset; + + p = q; + } + + return offset; +} + +const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) +{ + const struct fdt_node_header *nh = _fdt_offset_ptr(fdt, nodeoffset); + int err; + + if (((err = fdt_check_header(fdt)) != 0) + || ((err = _fdt_check_node_offset(fdt, nodeoffset)) < 0)) + goto fail; + + if (len) + *len = strlen(nh->name); + + return nh->name; + + fail: + if (len) + *len = err; + return NULL; +} + +const struct fdt_property *fdt_get_property(const void *fdt, + int nodeoffset, + const char *name, int *lenp) +{ + uint32_t tag; + const struct fdt_property *prop; + int namestroff; + int offset, nextoffset; + int err; + + if (((err = fdt_check_header(fdt)) != 0) + || ((err = _fdt_check_node_offset(fdt, nodeoffset)) < 0)) + goto fail; + + nextoffset = err; + do { + offset = nextoffset; + + tag = fdt_next_tag(fdt, offset, &nextoffset); + switch (tag) { + case FDT_END: + err = -FDT_ERR_TRUNCATED; + goto fail; + + case FDT_BEGIN_NODE: + case FDT_END_NODE: + case FDT_NOP: + break; + + case FDT_PROP: + err = -FDT_ERR_BADSTRUCTURE; + prop = fdt_offset_ptr(fdt, offset, sizeof(*prop)); + if (! prop) + goto fail; + namestroff = fdt32_to_cpu(prop->nameoff); + if (strcmp(fdt_string(fdt, namestroff), name) == 0) { + /* Found it! */ + int len = fdt32_to_cpu(prop->len); + prop = fdt_offset_ptr(fdt, offset, + sizeof(*prop)+len); + if (! prop) + goto fail; + + if (lenp) + *lenp = len; + + return prop; + } + break; + + default: + err = -FDT_ERR_BADSTRUCTURE; + goto fail; + } + } while ((tag != FDT_BEGIN_NODE) && (tag != FDT_END_NODE)); + + err = -FDT_ERR_NOTFOUND; + fail: + if (lenp) + *lenp = err; + return NULL; +} + +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property(fdt, nodeoffset, name, lenp); + if (! prop) + return NULL; + + return prop->data; +} + +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset) +{ + const uint32_t *php; + int len; + + php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len); + if (!php || (len != sizeof(*php))) + return 0; + + return fdt32_to_cpu(*php); +} + +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen) +{ + int pdepth = 0, p = 0; + int offset, depth, namelen; + const char *name; + + FDT_CHECK_HEADER(fdt); + + if (buflen < 2) + return -FDT_ERR_NOSPACE; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (pdepth < depth) + continue; /* overflowed buffer */ + + while (pdepth > depth) { + do { + p--; + } while (buf[p-1] != '/'); + pdepth--; + } + + name = fdt_get_name(fdt, offset, &namelen); + if (!name) + return namelen; + if ((p + namelen + 1) <= buflen) { + memcpy(buf + p, name, namelen); + p += namelen; + buf[p++] = '/'; + pdepth++; + } + + if (offset == nodeoffset) { + if (pdepth < (depth + 1)) + return -FDT_ERR_NOSPACE; + + if (p > 1) /* special case so that root path is "/", not "" */ + p--; + buf[p] = '\0'; + return p; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth) +{ + int offset, depth; + int supernodeoffset = -FDT_ERR_INTERNAL; + + FDT_CHECK_HEADER(fdt); + + if (supernodedepth < 0) + return -FDT_ERR_NOTFOUND; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth == supernodedepth) + supernodeoffset = offset; + + if (offset == nodeoffset) { + if (nodedepth) + *nodedepth = depth; + + if (supernodedepth > depth) + return -FDT_ERR_NOTFOUND; + else + return supernodeoffset; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_depth(const void *fdt, int nodeoffset) +{ + int nodedepth; + int err; + + err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth); + if (err) + return (err < 0) ? err : -FDT_ERR_INTERNAL; + return nodedepth; +} + +int fdt_parent_offset(const void *fdt, int nodeoffset) +{ + int nodedepth = fdt_node_depth(fdt, nodeoffset); + + if (nodedepth < 0) + return nodedepth; + return fdt_supernode_atdepth_offset(fdt, nodeoffset, + nodedepth - 1, NULL); +} + +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen) +{ + int offset; + const void *val; + int len; + + FDT_CHECK_HEADER(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_getprop(), then if that didn't + * find what we want, we scan over them again making our way + * to the next node. Still it's the easiest to implement + * approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + val = fdt_getprop(fdt, offset, propname, &len); + if (val && (len == proplen) + && (memcmp(val, propval, len) == 0)) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle) +{ + if ((phandle == 0) || (phandle == -1)) + return -FDT_ERR_BADPHANDLE; + phandle = cpu_to_fdt32(phandle); + return fdt_node_offset_by_prop_value(fdt, -1, "linux,phandle", + &phandle, sizeof(phandle)); +} + +int _stringlist_contains(const char *strlist, int listlen, const char *str) +{ + int len = strlen(str); + const char *p; + + while (listlen >= len) { + if (memcmp(str, strlist, len+1) == 0) + return 1; + p = memchr(strlist, '\0', listlen); + if (!p) + return 0; /* malformed strlist.. */ + listlen -= (p-strlist) + 1; + strlist = p + 1; + } + return 0; +} + +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, nodeoffset, "compatible", &len); + if (!prop) + return len; + if (_stringlist_contains(prop, len, compatible)) + return 0; + else + return 1; +} + +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible) +{ + int offset, err; + + FDT_CHECK_HEADER(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_node_check_compatible(), then if + * that didn't find what we want, we scan over them again + * making our way to the next node. Still it's the easiest to + * implement approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + err = fdt_node_check_compatible(fdt, offset, compatible); + if ((err < 0) && (err != -FDT_ERR_NOTFOUND)) + return err; + else if (err == 0) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} diff --git a/kexec/libfdt/fdt_rw.c b/kexec/libfdt/fdt_rw.c new file mode 100644 index 0000000..8e7ec4c --- /dev/null +++ b/kexec/libfdt/fdt_rw.c @@ -0,0 +1,463 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +static int _fdt_blocks_misordered(const void *fdt, + int mem_rsv_size, int struct_size) +{ + return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8)) + || (fdt_off_dt_struct(fdt) < + (fdt_off_mem_rsvmap(fdt) + mem_rsv_size)) + || (fdt_off_dt_strings(fdt) < + (fdt_off_dt_struct(fdt) + struct_size)) + || (fdt_totalsize(fdt) < + (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt))); +} + +static int _fdt_rw_check_header(void *fdt) +{ + FDT_CHECK_HEADER(fdt); + + if (fdt_version(fdt) < 17) + return -FDT_ERR_BADVERSION; + if (_fdt_blocks_misordered(fdt, sizeof(struct fdt_reserve_entry), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_BADLAYOUT; + if (fdt_version(fdt) > 17) + fdt_set_version(fdt, 17); + + return 0; +} + +#define FDT_RW_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = _fdt_rw_check_header(fdt)) != 0) \ + return err; \ + } + +static inline int _fdt_data_size(void *fdt) +{ + return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); +} + +static int _fdt_splice(void *fdt, void *splicepoint, int oldlen, int newlen) +{ + char *p = splicepoint; + char *end = (char *)fdt + _fdt_data_size(fdt); + + if (((p + oldlen) < p) || ((p + oldlen) > end)) + return -FDT_ERR_BADOFFSET; + if ((end - oldlen + newlen) > ((char *)fdt + fdt_totalsize(fdt))) + return -FDT_ERR_NOSPACE; + memmove(p + newlen, p + oldlen, end - p - oldlen); + return 0; +} + +static int _fdt_splice_mem_rsv(void *fdt, struct fdt_reserve_entry *p, + int oldn, int newn) +{ + int delta = (newn - oldn) * sizeof(*p); + int err; + err = _fdt_splice(fdt, p, oldn * sizeof(*p), newn * sizeof(*p)); + if (err) + return err; + fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int _fdt_splice_struct(void *fdt, void *p, + int oldlen, int newlen) +{ + int delta = newlen - oldlen; + int err; + + if ((err = _fdt_splice(fdt, p, oldlen, newlen))) + return err; + + fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int _fdt_splice_string(void *fdt, int newlen) +{ + void *p = (char *)fdt + + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); + int err; + + if ((err = _fdt_splice(fdt, p, 0, newlen))) + return err; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen); + return 0; +} + +static int _fdt_find_add_string(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_off_dt_strings(fdt); + const char *p; + char *new; + int len = strlen(s) + 1; + int err; + + p = _fdt_find_string(strtab, fdt_size_dt_strings(fdt), s); + if (p) + /* found it */ + return (p - strtab); + + new = strtab + fdt_size_dt_strings(fdt); + err = _fdt_splice_string(fdt, len); + if (err) + return err; + + memcpy(new, s, len); + return (new - strtab); +} + +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size) +{ + struct fdt_reserve_entry *re; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + re = _fdt_mem_rsv_w(fdt, fdt_num_mem_rsv(fdt)); + err = _fdt_splice_mem_rsv(fdt, re, 0, 1); + if (err) + return err; + + re->address = cpu_to_fdt64(address); + re->size = cpu_to_fdt64(size); + return 0; +} + +int fdt_del_mem_rsv(void *fdt, int n) +{ + struct fdt_reserve_entry *re = _fdt_mem_rsv_w(fdt, n); + int err; + + FDT_RW_CHECK_HEADER(fdt); + + if (n >= fdt_num_mem_rsv(fdt)) + return -FDT_ERR_NOTFOUND; + + err = _fdt_splice_mem_rsv(fdt, re, 1, 0); + if (err) + return err; + return 0; +} + +static int _fdt_resize_property(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int oldlen; + int err; + + *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (! (*prop)) + return oldlen; + + if ((err = _fdt_splice_struct(fdt, (*prop)->data, FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(len)))) + return err; + + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +static int _fdt_add_property(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int proplen; + int nextoffset; + int namestroff; + int err; + + if ((nextoffset = _fdt_check_node_offset(fdt, nodeoffset)) < 0) + return nextoffset; + + namestroff = _fdt_find_add_string(fdt, name); + if (namestroff < 0) + return namestroff; + + *prop = _fdt_offset_ptr_w(fdt, nextoffset); + proplen = sizeof(**prop) + FDT_TAGALIGN(len); + + err = _fdt_splice_struct(fdt, *prop, 0, proplen); + if (err) + return err; + + (*prop)->tag = cpu_to_fdt32(FDT_PROP); + (*prop)->nameoff = cpu_to_fdt32(namestroff); + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +int fdt_set_name(void *fdt, int nodeoffset, const char *name) +{ + char *namep; + int oldlen, newlen; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen); + if (!namep) + return oldlen; + + newlen = strlen(name); + + err = _fdt_splice_struct(fdt, namep, FDT_TAGALIGN(oldlen+1), + FDT_TAGALIGN(newlen+1)); + if (err) + return err; + + memcpy(namep, name, newlen+1); + return 0; +} + +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + err = _fdt_resize_property(fdt, nodeoffset, name, len, &prop); + if (err == -FDT_ERR_NOTFOUND) + err = _fdt_add_property(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + + memcpy(prop->data, val, len); + return 0; +} + +int fdt_delprop(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len, proplen; + + FDT_RW_CHECK_HEADER(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (! prop) + return len; + + proplen = sizeof(*prop) + FDT_TAGALIGN(len); + return _fdt_splice_struct(fdt, prop, proplen, 0); +} + +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen) +{ + struct fdt_node_header *nh; + int offset, nextoffset; + int nodelen; + int err; + uint32_t tag; + uint32_t *endtag; + + FDT_RW_CHECK_HEADER(fdt); + + offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen); + if (offset >= 0) + return -FDT_ERR_EXISTS; + else if (offset != -FDT_ERR_NOTFOUND) + return offset; + + /* Try to place the new node after the parent's properties */ + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + } while ((tag == FDT_PROP) || (tag == FDT_NOP)); + + nh = _fdt_offset_ptr_w(fdt, offset); + nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE; + + err = _fdt_splice_struct(fdt, nh, 0, nodelen); + if (err) + return err; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memset(nh->name, 0, FDT_TAGALIGN(namelen+1)); + memcpy(nh->name, name, namelen); + endtag = (uint32_t *)((char *)nh + nodelen - FDT_TAGSIZE); + *endtag = cpu_to_fdt32(FDT_END_NODE); + + return offset; +} + +int fdt_add_subnode(void *fdt, int parentoffset, const char *name) +{ + return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_del_node(void *fdt, int nodeoffset) +{ + int endoffset; + + FDT_RW_CHECK_HEADER(fdt); + + endoffset = _fdt_node_end_offset(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + return _fdt_splice_struct(fdt, _fdt_offset_ptr_w(fdt, nodeoffset), + endoffset - nodeoffset, 0); +} + +static void _fdt_packblocks(const char *old, char *new, + int mem_rsv_size, int struct_size) +{ + int mem_rsv_off, struct_off, strings_off; + + mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8); + struct_off = mem_rsv_off + mem_rsv_size; + strings_off = struct_off + struct_size; + + memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size); + fdt_set_off_mem_rsvmap(new, mem_rsv_off); + + memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size); + fdt_set_off_dt_struct(new, struct_off); + fdt_set_size_dt_struct(new, struct_size); + + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); + fdt_set_off_dt_strings(new, strings_off); + fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); +} + +int fdt_open_into(const void *fdt, void *buf, int bufsize) +{ + int err; + int mem_rsv_size, struct_size; + int newsize; + const char *fdtstart = fdt; + const char *fdtend = fdtstart + fdt_totalsize(fdt); + char *tmp; + + FDT_CHECK_HEADER(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + + if (fdt_version(fdt) >= 17) { + struct_size = fdt_size_dt_struct(fdt); + } else { + struct_size = 0; + while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END) + ; + } + + if (!_fdt_blocks_misordered(fdt, mem_rsv_size, struct_size)) { + /* no further work necessary */ + err = fdt_move(fdt, buf, bufsize); + if (err) + return err; + fdt_set_version(buf, 17); + fdt_set_size_dt_struct(buf, struct_size); + fdt_set_totalsize(buf, bufsize); + return 0; + } + + /* Need to reorder */ + newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size + + struct_size + fdt_size_dt_strings(fdt); + + if (bufsize < newsize) + return -FDT_ERR_NOSPACE; + + /* First attempt to build converted tree at beginning of buffer */ + tmp = buf; + /* But if that overlaps with the old tree... */ + if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) { + /* Try right after the old tree instead */ + tmp = (char *)(uintptr_t)fdtend; + if ((tmp + newsize) > ((char *)buf + bufsize)) + return -FDT_ERR_NOSPACE; + } + + _fdt_packblocks(fdt, tmp, mem_rsv_size, struct_size); + memmove(buf, tmp, newsize); + + fdt_set_magic(buf, FDT_MAGIC); + fdt_set_totalsize(buf, bufsize); + fdt_set_version(buf, 17); + fdt_set_last_comp_version(buf, 16); + fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + + return 0; +} + +int fdt_pack(void *fdt) +{ + int mem_rsv_size; + + FDT_RW_CHECK_HEADER(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + _fdt_packblocks(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); + fdt_set_totalsize(fdt, _fdt_data_size(fdt)); + + return 0; +} diff --git a/kexec/libfdt/fdt_strerror.c b/kexec/libfdt/fdt_strerror.c new file mode 100644 index 0000000..e6c3cee --- /dev/null +++ b/kexec/libfdt/fdt_strerror.c @@ -0,0 +1,96 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +struct fdt_errtabent { + const char *str; +}; + +#define FDT_ERRTABENT(val) \ + [(val)] = { .str = #val, } + +static struct fdt_errtabent fdt_errtable[] = { + FDT_ERRTABENT(FDT_ERR_NOTFOUND), + FDT_ERRTABENT(FDT_ERR_EXISTS), + FDT_ERRTABENT(FDT_ERR_NOSPACE), + + FDT_ERRTABENT(FDT_ERR_BADOFFSET), + FDT_ERRTABENT(FDT_ERR_BADPATH), + FDT_ERRTABENT(FDT_ERR_BADSTATE), + + FDT_ERRTABENT(FDT_ERR_TRUNCATED), + FDT_ERRTABENT(FDT_ERR_BADMAGIC), + FDT_ERRTABENT(FDT_ERR_BADVERSION), + FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE), + FDT_ERRTABENT(FDT_ERR_BADLAYOUT), +}; +#define FDT_ERRTABSIZE (sizeof(fdt_errtable) / sizeof(fdt_errtable[0])) + +const char *fdt_strerror(int errval) +{ + if (errval > 0) + return "<valid offset/length>"; + else if (errval == 0) + return "<no error>"; + else if (errval > -FDT_ERRTABSIZE) { + const char *s = fdt_errtable[-errval].str; + + if (s) + return s; + } + + return "<unknown error>"; +} diff --git a/kexec/libfdt/fdt_sw.c b/kexec/libfdt/fdt_sw.c new file mode 100644 index 0000000..698329e --- /dev/null +++ b/kexec/libfdt/fdt_sw.c @@ -0,0 +1,257 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +static int _fdt_sw_check_header(void *fdt) +{ + if (fdt_magic(fdt) != FDT_SW_MAGIC) + return -FDT_ERR_BADMAGIC; + /* FIXME: should check more details about the header state */ + return 0; +} + +#define FDT_SW_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = _fdt_sw_check_header(fdt)) != 0) \ + return err; \ + } + +static void *_fdt_grab_space(void *fdt, int len) +{ + int offset = fdt_size_dt_struct(fdt); + int spaceleft; + + spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt) + - fdt_size_dt_strings(fdt); + + if ((offset + len < offset) || (offset + len > spaceleft)) + return NULL; + + fdt_set_size_dt_struct(fdt, offset + len); + return fdt_offset_ptr_w(fdt, offset, len); +} + +int fdt_create(void *buf, int bufsize) +{ + void *fdt = buf; + + if (bufsize < sizeof(struct fdt_header)) + return -FDT_ERR_NOSPACE; + + memset(buf, 0, bufsize); + + fdt_set_magic(fdt, FDT_SW_MAGIC); + fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION); + fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION); + fdt_set_totalsize(fdt, bufsize); + + fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header), + sizeof(struct fdt_reserve_entry))); + fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt)); + fdt_set_off_dt_strings(fdt, bufsize); + + return 0; +} + +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size) +{ + struct fdt_reserve_entry *re; + int offset; + + FDT_SW_CHECK_HEADER(fdt); + + if (fdt_size_dt_struct(fdt)) + return -FDT_ERR_BADSTATE; + + offset = fdt_off_dt_struct(fdt); + if ((offset + sizeof(*re)) > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + + re = (struct fdt_reserve_entry *)((char *)fdt + offset); + re->address = cpu_to_fdt64(addr); + re->size = cpu_to_fdt64(size); + + fdt_set_off_dt_struct(fdt, offset + sizeof(*re)); + + return 0; +} + +int fdt_finish_reservemap(void *fdt) +{ + return fdt_add_reservemap_entry(fdt, 0, 0); +} + +int fdt_begin_node(void *fdt, const char *name) +{ + struct fdt_node_header *nh; + int namelen = strlen(name) + 1; + + FDT_SW_CHECK_HEADER(fdt); + + nh = _fdt_grab_space(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen)); + if (! nh) + return -FDT_ERR_NOSPACE; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memcpy(nh->name, name, namelen); + return 0; +} + +int fdt_end_node(void *fdt) +{ + uint32_t *en; + + FDT_SW_CHECK_HEADER(fdt); + + en = _fdt_grab_space(fdt, FDT_TAGSIZE); + if (! en) + return -FDT_ERR_NOSPACE; + + *en = cpu_to_fdt32(FDT_END_NODE); + return 0; +} + +static int _fdt_find_add_string(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + const char *p; + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + int struct_top, offset; + + p = _fdt_find_string(strtab - strtabsize, strtabsize, s); + if (p) + return p - strtab; + + /* Add it */ + offset = -strtabsize - len; + struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + if (fdt_totalsize(fdt) + offset < struct_top) + return 0; /* no more room :( */ + + memcpy(strtab + offset, s, len); + fdt_set_size_dt_strings(fdt, strtabsize + len); + return offset; +} + +int fdt_property(void *fdt, const char *name, const void *val, int len) +{ + struct fdt_property *prop; + int nameoff; + + FDT_SW_CHECK_HEADER(fdt); + + nameoff = _fdt_find_add_string(fdt, name); + if (nameoff == 0) + return -FDT_ERR_NOSPACE; + + prop = _fdt_grab_space(fdt, sizeof(*prop) + FDT_TAGALIGN(len)); + if (! prop) + return -FDT_ERR_NOSPACE; + + prop->tag = cpu_to_fdt32(FDT_PROP); + prop->nameoff = cpu_to_fdt32(nameoff); + prop->len = cpu_to_fdt32(len); + memcpy(prop->data, val, len); + return 0; +} + +int fdt_finish(void *fdt) +{ + char *p = (char *)fdt; + uint32_t *end; + int oldstroffset, newstroffset; + uint32_t tag; + int offset, nextoffset; + + FDT_SW_CHECK_HEADER(fdt); + + /* Add terminator */ + end = _fdt_grab_space(fdt, sizeof(*end)); + if (! end) + return -FDT_ERR_NOSPACE; + *end = cpu_to_fdt32(FDT_END); + + /* Relocate the string table */ + oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt); + newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt)); + fdt_set_off_dt_strings(fdt, newstroffset); + + /* Walk the structure, correcting string offsets */ + offset = 0; + while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) { + if (tag == FDT_PROP) { + struct fdt_property *prop = + fdt_offset_ptr_w(fdt, offset, sizeof(*prop)); + int nameoff; + + if (! prop) + return -FDT_ERR_BADSTRUCTURE; + + nameoff = fdt32_to_cpu(prop->nameoff); + nameoff += fdt_size_dt_strings(fdt); + prop->nameoff = cpu_to_fdt32(nameoff); + } + offset = nextoffset; + } + + /* Finally, adjust the header */ + fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt)); + fdt_set_magic(fdt, FDT_MAGIC); + return 0; +} diff --git a/kexec/libfdt/fdt_wip.c b/kexec/libfdt/fdt_wip.c new file mode 100644 index 0000000..a4652c6 --- /dev/null +++ b/kexec/libfdt/fdt_wip.c @@ -0,0 +1,145 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include <fdt.h> +#include <libfdt.h> + +#include "libfdt_internal.h" + +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + void *propval; + int proplen; + + propval = fdt_getprop_w(fdt, nodeoffset, name, &proplen); + if (! propval) + return proplen; + + if (proplen != len) + return -FDT_ERR_NOSPACE; + + memcpy(propval, val, len); + return 0; +} + +static void _fdt_nop_region(void *start, int len) +{ + uint32_t *p; + + for (p = start; (char *)p < ((char *)start + len); p++) + *p = cpu_to_fdt32(FDT_NOP); +} + +int fdt_nop_property(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len; + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (! prop) + return len; + + _fdt_nop_region(prop, len + sizeof(*prop)); + + return 0; +} + +int _fdt_node_end_offset(void *fdt, int nodeoffset) +{ + int level = 0; + uint32_t tag; + int offset, nextoffset; + + tag = fdt_next_tag(fdt, nodeoffset, &nextoffset); + if (tag != FDT_BEGIN_NODE) + return -FDT_ERR_BADOFFSET; + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_END: + return offset; + + case FDT_BEGIN_NODE: + level++; + break; + + case FDT_END_NODE: + level--; + break; + + case FDT_PROP: + case FDT_NOP: + break; + + default: + return -FDT_ERR_BADSTRUCTURE; + } + } while (level >= 0); + + return nextoffset; +} + +int fdt_nop_node(void *fdt, int nodeoffset) +{ + int endoffset; + + endoffset = _fdt_node_end_offset(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + _fdt_nop_region(fdt_offset_ptr_w(fdt, nodeoffset, 0), + endoffset - nodeoffset); + return 0; +} diff --git a/kexec/libfdt/libfdt.h b/kexec/libfdt/libfdt.h new file mode 100644 index 0000000..87a24ab --- /dev/null +++ b/kexec/libfdt/libfdt.h @@ -0,0 +1,1100 @@ +#ifndef _LIBFDT_H +#define _LIBFDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <libfdt_env.h> +#include <fdt.h> + +#define FDT_FIRST_SUPPORTED_VERSION 0x10 +#define FDT_LAST_SUPPORTED_VERSION 0x11 + +/* Error codes: informative error codes */ +#define FDT_ERR_NOTFOUND 1 + /* FDT_ERR_NOTFOUND: The requested node or property does not exist */ +#define FDT_ERR_EXISTS 2 + /* FDT_ERR_EXISTS: Attemped to create a node or property which + * already exists */ +#define FDT_ERR_NOSPACE 3 + /* FDT_ERR_NOSPACE: Operation needed to expand the device + * tree, but its buffer did not have sufficient space to + * contain the expanded tree. Use fdt_open_into() to move the + * device tree to a buffer with more space. */ + +/* Error codes: codes for bad parameters */ +#define FDT_ERR_BADOFFSET 4 + /* FDT_ERR_BADOFFSET: Function was passed a structure block + * offset which is out-of-bounds, or which points to an + * unsuitable part of the structure for the operation. */ +#define FDT_ERR_BADPATH 5 + /* FDT_ERR_BADPATH: Function was passed a badly formatted path + * (e.g. missing a leading / for a function which requires an + * absolute path) */ +#define FDT_ERR_BADPHANDLE 6 + /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle + * value. phandle values of 0 and -1 are not permitted. */ +#define FDT_ERR_BADSTATE 7 + /* FDT_ERR_BADSTATE: Function was passed an incomplete device + * tree created by the sequential-write functions, which is + * not sufficiently complete for the requested operation. */ + +/* Error codes: codes for bad device tree blobs */ +#define FDT_ERR_TRUNCATED 8 + /* FDT_ERR_TRUNCATED: Structure block of the given device tree + * ends without an FDT_END tag. */ +#define FDT_ERR_BADMAGIC 9 + /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a + * device tree at all - it is missing the flattened device + * tree magic number. */ +#define FDT_ERR_BADVERSION 10 + /* FDT_ERR_BADVERSION: Given device tree has a version which + * can't be handled by the requested operation. For + * read-write functions, this may mean that fdt_open_into() is + * required to convert the tree to the expected version. */ +#define FDT_ERR_BADSTRUCTURE 11 + /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt + * structure block or other serious error (e.g. misnested + * nodes, or subnodes preceding properties). */ +#define FDT_ERR_BADLAYOUT 12 + /* FDT_ERR_BADLAYOUT: For read-write functions, the given + * device tree has it's sub-blocks in an order that the + * function can't handle (memory reserve map, then structure, + * then strings). Use fdt_open_into() to reorganize the tree + * into a form suitable for the read-write operations. */ + +/* "Can't happen" error indicating a bug in libfdt */ +#define FDT_ERR_INTERNAL 13 + /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion. + * Should never be returned, if it is, it indicates a bug in + * libfdt itself. */ + +#define FDT_ERR_MAX 13 + +/**********************************************************************/ +/* Low-level functions (you probably don't need these) */ +/**********************************************************************/ + +const void *fdt_offset_ptr(const void *fdt, int offset, int checklen); +static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) +{ + return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); + +/**********************************************************************/ +/* Traversal functions */ +/**********************************************************************/ + +int fdt_next_node(const void *fdt, int offset, int *depth); + +/**********************************************************************/ +/* General functions */ +/**********************************************************************/ + +#define fdt_get_header(fdt, field) \ + (fdt32_to_cpu(((const struct fdt_header *)(fdt))->field)) +#define fdt_magic(fdt) (fdt_get_header(fdt, magic)) +#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) +#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) +#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings)) +#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap)) +#define fdt_version(fdt) (fdt_get_header(fdt, version)) +#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version)) +#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys)) +#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings)) +#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct)) + +#define __fdt_set_hdr(name) \ + static inline void fdt_set_##name(void *fdt, uint32_t val) \ + { \ + struct fdt_header *fdth = fdt; \ + fdth->name = cpu_to_fdt32(val); \ + } +__fdt_set_hdr(magic); +__fdt_set_hdr(totalsize); +__fdt_set_hdr(off_dt_struct); +__fdt_set_hdr(off_dt_strings); +__fdt_set_hdr(off_mem_rsvmap); +__fdt_set_hdr(version); +__fdt_set_hdr(last_comp_version); +__fdt_set_hdr(boot_cpuid_phys); +__fdt_set_hdr(size_dt_strings); +__fdt_set_hdr(size_dt_struct); +#undef __fdt_set_hdr + +/** + * fdt_check_header - sanity check a device tree or possible device tree + * @fdt: pointer to data which might be a flattened device tree + * + * fdt_check_header() checks that the given buffer contains what + * appears to be a flattened device tree with sane information in its + * header. + * + * returns: + * 0, if the buffer appears to contain a valid device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings, as above + */ +int fdt_check_header(const void *fdt); + +/** + * fdt_move - move a device tree around in memory + * @fdt: pointer to the device tree to move + * @buf: pointer to memory where the device is to be moved + * @bufsize: size of the memory space at buf + * + * fdt_move() relocates, if possible, the device tree blob located at + * fdt to the buffer at buf of size bufsize. The buffer may overlap + * with the existing device tree blob at fdt. Therefore, + * fdt_move(fdt, fdt, fdt_totalsize(fdt)) + * should always succeed. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_move(const void *fdt, void *buf, int bufsize); + +/**********************************************************************/ +/* Read-only functions */ +/**********************************************************************/ + +/** + * fdt_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * + * fdt_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds + */ +const char *fdt_string(const void *fdt, int stroffset); + +/** + * fdt_num_mem_rsv - retrieve the number of memory reserve map entries + * @fdt: pointer to the device tree blob + * + * Returns the number of entries in the device tree blob's memory + * reservation map. This does not include the terminating 0,0 entry + * or any other (0,0) entries reserved for expansion. + * + * returns: + * the number of entries + */ +int fdt_num_mem_rsv(const void *fdt); + +/** + * fdt_get_mem_rsv - retrieve one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: pointers to 64-bit variables + * + * On success, *address and *size will contain the address and size of + * the n-th reserve map entry from the device tree blob, in + * native-endian format. + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); + +/** + * fdt_subnode_offset_namelen - find a subnode based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_subnode_offset(), but only examine the first + * namelen characters of name for matching the subnode name. This is + * useful for finding subnodes based on a portion of a larger string, + * such as a full path. + */ +int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, + const char *name, int namelen); +/** + * fdt_subnode_offset - find a subnode of a given node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_subnode_offset() finds a subnode of the node at structure block + * offset parentoffset with the given name. name may include a unit + * address, in which case fdt_subnode_offset() will find the subnode + * with that unit address, or the unit address may be omitted, in + * which case fdt_subnode_offset() will find an arbitrary subnode + * whose name excluding unit address matches the given name. + * + * returns: + * structure block offset of the requested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); + +/** + * fdt_path_offset - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * + * fdt_path_offset() finds a node of a given path in the device tree. + * Each path component may omit the unit address portion, but the + * results of this are undefined if any such path component is + * ambiguous (that is if there are multiple nodes at the relevant + * level matching the given component, differentiated only by unit + * address). + * + * returns: + * structure block offset of the node with the requested path (>=0), on success + * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid + * -FDT_ERR_NOTFOUND, if the requested node does not exist + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_path_offset(const void *fdt, const char *path); + +/** + * fdt_get_name - retrieve the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the starting node + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_name() retrieves the name (including unit address) of the + * device tree node at structure block offset nodeoffset. If lenp is + * non-NULL, the length of this name is also returned, in the integer + * pointed to by lenp. + * + * returns: + * pointer to the node's name, on success + * If lenp is non-NULL, *lenp contains the length of that name (>=0) + * NULL, on error + * if lenp is non-NULL *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp); + +/** + * fdt_get_property - find a given property in a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property() retrieves a pointer to the fdt_property + * structure within the device tree blob corresponding to the property + * named 'name' of the node at offset nodeoffset. If lenp is + * non-NULL, the length of the property value is also returned, in the + * integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, + const char *name, + int *lenp) +{ + return (struct fdt_property *)(uintptr_t) + fdt_get_property(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_getprop - retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop() retrieves a pointer to the value of the property + * named 'name' of the node at offset nodeoffset (this will be a + * pointer to within the device blob itself, not a copy of the value). + * If lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline void *fdt_getprop_w(void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_get_phandle - retrieve the phandle of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the node + * + * fdt_get_phandle() retrieves the phandle of the device tree node at + * structure block offset nodeoffset. + * + * returns: + * the phandle of the node at nodeoffset, on success (!= 0, != -1) + * 0, if the node has no phandle, or another error occurs + */ +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); + +/** + * fdt_get_path - determine the full path of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose path to find + * @buf: character buffer to contain the returned path (will be overwritten) + * @buflen: size of the character buffer at buf + * + * fdt_get_path() computes the full path of the node at offset + * nodeoffset, and records that path in the buffer at buf. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * 0, on success + * buf contains the absolute path of the node at + * nodeoffset, as a NUL-terminated string. + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1) + * characters and will not fit in the given buffer. + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen); + +/** + * fdt_supernode_atdepth_offset - find a specific ancestor of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * @supernodedepth: depth of the ancestor to find + * @nodedepth: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_supernode_atdepth_offset() finds an ancestor of the given node + * at a specific depth from the root (where the root itself has depth + * 0, its immediate subnodes depth 1 and so forth). So + * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL); + * will always return 0, the offset of the root node. If the node at + * nodeoffset has depth D, then: + * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL); + * will return nodeoffset itself. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + + * structure block offset of the node at node offset's ancestor + * of depth supernodedepth (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag +* -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of nodeoffset + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth); + +/** + * fdt_node_depth - find the depth of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_node_depth() finds the depth of a given node. The root node + * has depth 0, its immediate subnodes depth 1 and so forth. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * depth of the node at nodeoffset (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_depth(const void *fdt, int nodeoffset); + +/** + * fdt_parent_offset - find the parent of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_parent_offset() locates the parent node of a given node (that + * is, it finds the offset of the node which contains the node at + * nodeoffset as a subnode). + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset, *twice*. + * + * returns: + * structure block offset of the parent of the node at nodeoffset + * (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_parent_offset(const void *fdt, int nodeoffset); + +/** + * fdt_node_offset_by_prop_value - find nodes with a given property value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @propname: property name to check + * @propval: property value to search for + * @proplen: length of the value in propval + * + * fdt_node_offset_by_prop_value() returns the offset of the first + * node after startoffset, which has a property named propname whose + * value is of length proplen and has value equal to propval; or if + * startoffset is -1, the very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_prop_value(fdt, -1, propname, + * propval, proplen); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_prop_value(fdt, offset, propname, + * propval, proplen); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen); + +/** + * fdt_node_offset_by_phandle - find the node with a given phandle + * @fdt: pointer to the device tree blob + * @phandle: phandle value + * + * fdt_node_offset_by_phandle() returns the offset of the node + * which has the given phandle value. If there is more than one node + * in the tree with the given phandle (an invalid tree), results are + * undefined. + * + * returns: + * structure block offset of the located node (>= 0), on success + * -FDT_ERR_NOTFOUND, no node with that phandle exists + * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle); + +/** + * fdt_node_check_compatible: check a node's compatible property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @compatible: string to match against + * + * + * fdt_node_check_compatible() returns 0 if the given node contains a + * 'compatible' property with the given string as one of its elements, + * it returns non-zero otherwise, or on error. + * + * returns: + * 0, if the node has a 'compatible' property listing the given string + * 1, if the node has a 'compatible' property, but it does not list + * the given string + * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property + * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible); + +/** + * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @compatible: 'compatible' string to match against + * + * fdt_node_offset_by_compatible() returns the offset of the first + * node after startoffset, which has a 'compatible' property which + * lists the given compatible string; or if startoffset is -1, the + * very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_compatible(fdt, -1, compatible); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_compatible(fdt, offset, compatible); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible); + +/**********************************************************************/ +/* Write-in-place functions */ +/**********************************************************************/ + +/** + * fdt_setprop_inplace - change a property's value, but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * fdt_setprop_inplace() replaces the value of a given property with + * the data in val, of length len. This function cannot change the + * size of a property, and so will only work if len is equal to the + * current length of the property. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if len is not equal to the property's current length + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_inplace_cell - change the value of a single-cell property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: cell (32-bit integer) value to replace the property with + * + * fdt_setprop_inplace_cell() replaces the value of a given property + * with the 32-bit integer cell value in val, converting val to + * big-endian if necessary. This function cannot change the size of a + * property, and so will only work if the property already exists and + * has length 4. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 4 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + val = cpu_to_fdt32(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &val, sizeof(val)); +} + +/** + * fdt_nop_property - replace a property with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_nop_property() will replace a given property's representation + * in the blob with FDT_NOP tags, effectively removing it from the + * tree. + * + * This function will alter only the bytes in the blob which contain + * the property, and will not alter or move any other part of the + * tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_property(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_nop_node - replace a node (subtree) with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_nop_node() will replace a given node's representation in the + * blob, including all its subnodes, if any, with FDT_NOP tags, + * effectively removing it from the tree. + * + * This function will alter only the bytes in the blob which contain + * the node and its properties and subnodes, and will not alter or + * move any other part of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Sequential write functions */ +/**********************************************************************/ + +int fdt_create(void *buf, int bufsize); +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size); +int fdt_finish_reservemap(void *fdt); +int fdt_begin_node(void *fdt, const char *name); +int fdt_property(void *fdt, const char *name, const void *val, int len); +static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val) +{ + val = cpu_to_fdt32(val); + return fdt_property(fdt, name, &val, sizeof(val)); +} +#define fdt_property_string(fdt, name, str) \ + fdt_property(fdt, name, str, strlen(str)+1) +int fdt_end_node(void *fdt); +int fdt_finish(void *fdt); + +/**********************************************************************/ +/* Read-write functions */ +/**********************************************************************/ + +int fdt_open_into(const void *fdt, void *buf, int bufsize); +int fdt_pack(void *fdt); + +/** + * fdt_add_mem_rsv - add one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: 64-bit values (native endian) + * + * Adds a reserve map entry to the given blob reserving a region at + * address address of length size. + * + * This function will insert data into the reserve map and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new reservation entry + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size); + +/** + * fdt_del_mem_rsv - remove a memory reserve map entry + * @fdt: pointer to the device tree blob + * @n: entry to remove + * + * fdt_del_mem_rsv() removes the n-th memory reserve map entry from + * the blob. + * + * This function will delete data from the reservation table and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there + * are less than n+1 reserve map entries) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_mem_rsv(void *fdt, int n); + +/** + * fdt_set_name - change the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * @name: name to give the node + * + * fdt_set_name() replaces the name (including unit address, if any) + * of the given node with the given string. NOTE: this function can't + * efficiently check if the new name is unique amongst the given + * node's siblings; results are undefined if this function is invoked + * with a name equal to one of the given node's siblings. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob + * to contain the new name + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_set_name(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_setprop - create or change a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to set the property value to + * @len: length of the property value + * + * fdt_setprop() sets the value of the named property in the given + * node to the given value and length, creating the property if it + * does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_cell - set a property to a single cell value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value for the property (native endian) + * + * fdt_setprop_cell() sets the value of the named property in the + * given node to the given cell value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + val = cpu_to_fdt32(val); + return fdt_setprop(fdt, nodeoffset, name, &val, sizeof(val)); +} + +/** + * fdt_setprop_string - set a property to a string value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value for the property + * + * fdt_setprop_string() sets the value of the named property in the + * given node to the given string value (using the length of the + * string to determine the new length of the property), or creates a + * new property with that value if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_string(fdt, nodeoffset, name, str) \ + fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_delprop - delete a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_del_property() will delete the given property. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_delprop(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_add_subnode_namelen - creates a new node based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_add_subnode(), but use only the first namelen + * characters of name as the name of the new node. This is useful for + * creating subnodes based on a portion of a larger string, such as a + * full path. + */ +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen); + +/** + * fdt_add_subnode - creates a new node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_add_subnode() creates a new node as a subnode of the node at + * structure block offset parentoffset, with the given name (which + * should include the unit address, if any). + * + * This function will insert data into the blob, and will therefore + * change the offsets of some existing nodes. + + * returns: + * structure block offset of the created nodeequested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of + * the given name + * -FDT_ERR_NOSPACE, if there is insufficient free space in the + * blob to contain the new node + * -FDT_ERR_NOSPACE + * -FDT_ERR_BADLAYOUT + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_add_subnode(void *fdt, int parentoffset, const char *name); + +/** + * fdt_del_node - delete a node (subtree) + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_del_node() will remove the given node, including all its + * subnodes if any, from the blob. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Debugging / informational functions */ +/**********************************************************************/ + +const char *fdt_strerror(int errval); + +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +/* + * if add a new subnode: + * see: fdt_add_subnode -> fdt_add_subnode_namelen + */ +static inline int fdt_node_len(const char* node_name) +{ + return sizeof(struct fdt_node_header) + + FDT_TAGALIGN(strlen(node_name) + 1) + FDT_TAGSIZE; +} + +/* + * if add a new prop: (assume prop_name not exist in strtab) + * see: fdt_setprop -> _fdt_add_property + */ +static inline int fdt_prop_len(const char* prop_name, int len) +{ + return (strlen(prop_name) + 1) + + sizeof(struct fdt_property) + + FDT_TAGALIGN(len); +} + +#endif /* _LIBFDT_H */ diff --git a/kexec/libfdt/libfdt_env.h b/kexec/libfdt/libfdt_env.h new file mode 100644 index 0000000..449bf60 --- /dev/null +++ b/kexec/libfdt/libfdt_env.h @@ -0,0 +1,23 @@ +#ifndef _LIBFDT_ENV_H +#define _LIBFDT_ENV_H + +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define _B(n) ((unsigned long long)((uint8_t *)&x)[n]) +static inline uint32_t fdt32_to_cpu(uint32_t x) +{ + return (_B(0) << 24) | (_B(1) << 16) | (_B(2) << 8) | _B(3); +} +#define cpu_to_fdt32(x) fdt32_to_cpu(x) + +static inline uint64_t fdt64_to_cpu(uint64_t x) +{ + return (_B(0) << 56) | (_B(1) << 48) | (_B(2) << 40) | (_B(3) << 32) + | (_B(4) << 24) | (_B(5) << 16) | (_B(6) << 8) | _B(7); +} +#define cpu_to_fdt64(x) fdt64_to_cpu(x) +#undef _B + +#endif /* _LIBFDT_ENV_H */ diff --git a/kexec/libfdt/libfdt_internal.h b/kexec/libfdt/libfdt_internal.h new file mode 100644 index 0000000..7e6c4c8 --- /dev/null +++ b/kexec/libfdt/libfdt_internal.h @@ -0,0 +1,92 @@ +#ifndef _LIBFDT_INTERNAL_H +#define _LIBFDT_INTERNAL_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <fdt.h> + +#define FDT_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = fdt_check_header(fdt)) != 0) \ + return err; \ + } + +uint32_t _fdt_next_tag(const void *fdt, int startoffset, int *nextoffset); +int _fdt_check_node_offset(const void *fdt, int offset); +const char *_fdt_find_string(const char *strtab, int tabsize, const char *s); +int _fdt_node_end_offset(void *fdt, int nodeoffset); + +static inline const void *_fdt_offset_ptr(const void *fdt, int offset) +{ + return (const char *)fdt + fdt_off_dt_struct(fdt) + offset; +} + +static inline void *_fdt_offset_ptr_w(void *fdt, int offset) +{ + return (void *)(uintptr_t)_fdt_offset_ptr(fdt, offset); +} + +static inline const struct fdt_reserve_entry *_fdt_mem_rsv(const void *fdt, int n) +{ + const struct fdt_reserve_entry *rsv_table = + (const struct fdt_reserve_entry *) + ((const char *)fdt + fdt_off_mem_rsvmap(fdt)); + + return rsv_table + n; +} +static inline struct fdt_reserve_entry *_fdt_mem_rsv_w(void *fdt, int n) +{ + return (void *)(uintptr_t)_fdt_mem_rsv(fdt, n); +} + +#define FDT_SW_MAGIC (~FDT_MAGIC) + +#endif /* _LIBFDT_INTERNAL_H */ diff --git a/kexec/lzma.c b/kexec/lzma.c new file mode 100644 index 0000000..2fc07e6 --- /dev/null +++ b/kexec/lzma.c @@ -0,0 +1,253 @@ +#include <unistd.h> +#include <sys/types.h> + +#include "kexec-lzma.h" +#include "config.h" +#include "kexec.h" + +#ifdef HAVE_LIBLZMA +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/stat.h> +#include <ctype.h> +#include <lzma.h> + +#define kBufferSize (1 << 15) + +typedef struct lzfile { + uint8_t buf[kBufferSize]; + lzma_stream strm; + FILE *file; + int encoding; + int eof; +} LZFILE; + +LZFILE *lzopen(const char *path, const char *mode); +int lzclose(LZFILE *lzfile); +ssize_t lzread(LZFILE *lzfile, void *buf, size_t len); + +static LZFILE *lzopen_internal(const char *path, const char *mode, int fd) +{ + int level = 5; + int encoding = 0; + FILE *fp; + LZFILE *lzfile; + lzma_ret ret; + lzma_stream lzma_strm_tmp = LZMA_STREAM_INIT; + + for (; *mode; mode++) { + if (*mode == 'w') + encoding = 1; + else if (*mode == 'r') + encoding = 0; + else if (*mode >= '1' && *mode <= '9') + level = *mode - '0'; + } + if (fd != -1) + fp = fdopen(fd, encoding ? "w" : "r"); + else + fp = fopen(path, encoding ? "w" : "r"); + if (!fp) + return NULL; + + lzfile = calloc(1, sizeof(*lzfile)); + + if (!lzfile) { + fclose(fp); + return NULL; + } + + lzfile->file = fp; + lzfile->encoding = encoding; + lzfile->eof = 0; + lzfile->strm = lzma_strm_tmp; + if (encoding) { + lzma_options_lzma opt_lzma; + if (lzma_lzma_preset(&opt_lzma, level - 1)) + return NULL; + ret = lzma_alone_encoder(&lzfile->strm, &opt_lzma); + } else { + ret = lzma_auto_decoder(&lzfile->strm, + UINT64_C(64) * 1024 * 1024, 0); + } + if (ret != LZMA_OK) { + fclose(fp); + free(lzfile); + return NULL; + } + return lzfile; +} + +LZFILE *lzopen(const char *path, const char *mode) +{ + return lzopen_internal(path, mode, -1); +} + +int lzclose(LZFILE *lzfile) +{ + lzma_ret ret; + size_t n; + + if (!lzfile) + return -1; + + if (lzfile->encoding) { + for (;;) { + lzfile->strm.avail_out = kBufferSize; + lzfile->strm.next_out = lzfile->buf; + ret = lzma_code(&lzfile->strm, LZMA_FINISH); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + return -1; + n = kBufferSize - lzfile->strm.avail_out; + if (n && fwrite(lzfile->buf, 1, n, lzfile->file) != n) + return -1; + if (ret == LZMA_STREAM_END) + break; + } + } + lzma_end(&lzfile->strm); + + return fclose(lzfile->file); + free(lzfile); +} + +ssize_t lzread(LZFILE *lzfile, void *buf, size_t len) +{ + lzma_ret ret; + int eof = 0; + + if (!lzfile || lzfile->encoding) + return -1; + + if (lzfile->eof) + return 0; + + lzfile->strm.next_out = buf; + lzfile->strm.avail_out = len; + + for (;;) { + if (!lzfile->strm.avail_in) { + lzfile->strm.next_in = lzfile->buf; + lzfile->strm.avail_in = fread(lzfile->buf, 1, kBufferSize, lzfile->file); + if (!lzfile->strm.avail_in) + eof = 1; + } + + ret = lzma_code(&lzfile->strm, LZMA_RUN); + if (ret == LZMA_STREAM_END) { + lzfile->eof = 1; + return len - lzfile->strm.avail_out; + } + + if (ret != LZMA_OK) + return -1; + + if (!lzfile->strm.avail_out) + return len; + + if (eof) + return -1; + } +} + +int is_lzma_file(const char *filename) +{ + FILE *fp; + int ret = 0; + uint8_t buf[13]; + + if (!filename) + return 0; + + fp = fopen(filename, "r"); + if (fp == NULL) + return 0; + + const size_t size = fread(buf, 1, sizeof(buf), fp); + + if (size != 13) { + /* file is too small to be a lzma file. */ + fclose(fp); + return 0; + } + + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + + switch (lzma_properties_decode(&filter, NULL, buf, 5)) { + case LZMA_OK: + ret = 1; + break; + default: + /* It's not a lzma file */ + ret = 0; + } + + fclose(fp); + return ret; +} + +char *lzma_decompress_file(const char *filename, off_t *r_size) +{ + LZFILE *fp; + char *buf; + off_t size, allocated; + ssize_t result; + + dbgprintf("Try LZMA decompression.\n"); + + *r_size = 0; + if (!filename) + return NULL; + + if (!is_lzma_file(filename)) + return NULL; + + fp = lzopen(filename, "rb"); + if (fp == 0) { + dbgprintf("Cannot open `%s'\n", filename); + return NULL; + } + size = 0; + allocated = 65536; + buf = xmalloc(allocated); + do { + if (size == allocated) { + allocated <<= 1; + buf = xrealloc(buf, allocated); + } + result = lzread(fp, buf + size, allocated - size); + if (result < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + + dbgprintf("%s: read on %s of %ld bytes failed\n", + __func__, filename, (allocated - size) + 0UL); + break; + } + size += result; + } while (result > 0); + + if (lzclose(fp) != LZMA_OK) { + dbgprintf("%s: Close of %s failed\n", __func__, filename); + goto fail; + } + if (result < 0) + goto fail; + + *r_size = size; + return buf; +fail: + free(buf); + return NULL; +} +#else +char *lzma_decompress_file(const char *UNUSED(filename), off_t *UNUSED(r_size)) +{ + return NULL; +} +#endif /* HAVE_LIBLZMA */ diff --git a/kexec/mem_regions.c b/kexec/mem_regions.c new file mode 100644 index 0000000..ad7d3f1 --- /dev/null +++ b/kexec/mem_regions.c @@ -0,0 +1,169 @@ +#include <stdlib.h> + +#include "kexec.h" +#include "mem_regions.h" + +static int mem_range_cmp(const void *a1, const void *a2) +{ + const struct memory_range *r1 = a1; + const struct memory_range *r2 = a2; + + if (r1->start > r2->start) + return 1; + if (r1->start < r2->start) + return -1; + + return 0; +} + +/** + * mem_regions_sort() - sort ranges into ascending address order + * @ranges: ranges to sort + * + * Sort the memory regions into ascending address order. + */ +void mem_regions_sort(struct memory_ranges *ranges) +{ + qsort(ranges->ranges, ranges->size, sizeof(*ranges->ranges), + mem_range_cmp); +} + +/** + * mem_regions_add() - add a memory region to a set of ranges + * @ranges: ranges to add the memory region to + * @base: base address of memory region + * @length: length of memory region in bytes + * @type: type of memory region + * + * Add the memory region to the set of ranges, and return %0 if successful, + * or %-1 if we ran out of space. + */ +int mem_regions_add(struct memory_ranges *ranges, unsigned long long base, + unsigned long long length, int type) +{ + struct memory_range *range; + + if (ranges->size >= ranges->max_size) + return -1; + + range = ranges->ranges + ranges->size++; + range->start = base; + range->end = base + length - 1; + range->type = type; + + return 0; +} + +static void mem_regions_remove(struct memory_ranges *ranges, int index) +{ + int tail_entries; + + /* we are assured to have at least one entry */ + ranges->size -= 1; + + /* if we have following entries, shuffle them down one place */ + tail_entries = ranges->size - index; + if (tail_entries) + memmove(ranges->ranges + index, ranges->ranges + index + 1, + tail_entries * sizeof(*ranges->ranges)); + + /* zero the new tail entry */ + memset(ranges->ranges + ranges->size, 0, sizeof(*ranges->ranges)); +} + +/** + * mem_regions_exclude() - excludes a memory region from a set of memory ranges + * @ranges: memory ranges to exclude the region from + * @range: memory range to exclude + * + * Exclude a memory region from a set of memory ranges. We assume that + * the region to be excluded is either wholely located within one of the + * memory ranges, or not at all. + */ +int mem_regions_exclude(struct memory_ranges *ranges, + const struct memory_range *range) +{ + int i, ret; + + for (i = 0; i < ranges->size; i++) { + struct memory_range *r = ranges->ranges + i; + + /* + * We assume that crash area is fully contained in + * some larger memory area. + */ + if (r->start <= range->start && r->end >= range->end) { + if (r->start == range->start) { + if (r->end == range->end) + /* Remove this entry */ + mem_regions_remove(ranges, i); + else + /* Shrink the start of this memory range */ + r->start = range->end + 1; + } else if (r->end == range->end) { + /* Shrink the end of this memory range */ + r->end = range->start - 1; + } else { + /* + * Split this area into 2 smaller ones and + * remove excluded range from between. First + * create new entry for the remaining area. + */ + ret = mem_regions_add(ranges, range->end + 1, + r->end - range->end, 0); + if (ret < 0) + return ret; + + /* + * Update this area to end before excluded + * range. + */ + r->end = range->start - 1; + break; + } + } + } + return 0; +} + +#define KEXEC_MEMORY_RANGES 16 + +int mem_regions_alloc_and_add(struct memory_ranges *ranges, + unsigned long long base, + unsigned long long length, int type) +{ + void *new_ranges; + + if (ranges->size >= ranges->max_size) { + new_ranges = realloc(ranges->ranges, + sizeof(struct memory_range) * + (ranges->max_size + KEXEC_MEMORY_RANGES)); + if (!new_ranges) + return -1; + + ranges->ranges = new_ranges; + ranges->max_size += KEXEC_MEMORY_RANGES; + } + + return mem_regions_add(ranges, base, length, type); +} + +int mem_regions_alloc_and_exclude(struct memory_ranges *ranges, + const struct memory_range *range) +{ + void *new_ranges; + + /* for safety, we should have at least one free entry in ranges */ + if (ranges->size >= ranges->max_size) { + new_ranges = realloc(ranges->ranges, + sizeof(struct memory_range) * + (ranges->max_size + KEXEC_MEMORY_RANGES)); + if (!new_ranges) + return -1; + + ranges->ranges = new_ranges; + ranges->max_size += KEXEC_MEMORY_RANGES; + } + + return mem_regions_exclude(ranges, range); +} diff --git a/kexec/mem_regions.h b/kexec/mem_regions.h new file mode 100644 index 0000000..e306d67 --- /dev/null +++ b/kexec/mem_regions.h @@ -0,0 +1,22 @@ +#ifndef MEM_REGIONS_H +#define MEM_REGIONS_H + +struct memory_ranges; +struct memory_range; + +void mem_regions_sort(struct memory_ranges *ranges); + +int mem_regions_exclude(struct memory_ranges *ranges, + const struct memory_range *range); + +int mem_regions_add(struct memory_ranges *ranges, unsigned long long base, + unsigned long long length, int type); + +int mem_regions_alloc_and_exclude(struct memory_ranges *ranges, + const struct memory_range *range); + +int mem_regions_alloc_and_add(struct memory_ranges *ranges, + unsigned long long base, + unsigned long long length, int type); + +#endif diff --git a/kexec/phys_arch.c b/kexec/phys_arch.c new file mode 100644 index 0000000..1571a0f --- /dev/null +++ b/kexec/phys_arch.c @@ -0,0 +1,28 @@ +#include "kexec.h" +#include <errno.h> +#include <string.h> +#include <sys/utsname.h> + +long physical_arch(void) +{ + struct utsname utsname; + int i, result = uname(&utsname); + if (result < 0) { + fprintf(stderr, "uname failed: %s\n", + strerror(errno)); + return -1; + } + + for (i = 0; arches[i].machine; ++i) { + if (strcmp(utsname.machine, arches[i].machine) == 0) + return arches[i].arch; + if ((strcmp(arches[i].machine, "arm") == 0) && + (strncmp(utsname.machine, arches[i].machine, + strlen(arches[i].machine)) == 0)) + return arches[i].arch; + } + + fprintf(stderr, "Unsupported machine type: %s\n", + utsname.machine); + return -1; +} diff --git a/kexec/phys_to_virt.c b/kexec/phys_to_virt.c new file mode 100644 index 0000000..5e8c4e3 --- /dev/null +++ b/kexec/phys_to_virt.c @@ -0,0 +1,16 @@ +#include "kexec.h" +#include "crashdump.h" + +/** + * phys_to_virt() - translate physical address to virtual address + * @paddr: physical address to translate + * + * For most architectures physical address is simply virtual address minus + * PAGE_OFFSET. Architectures that don't follow this convention should provide + * their own implementation. + */ +unsigned long +phys_to_virt(struct crash_elf_info *elf_info, unsigned long long paddr) +{ + return paddr + elf_info->page_offset; +} diff --git a/kexec/proc_iomem.c b/kexec/proc_iomem.c new file mode 100644 index 0000000..85daa85 --- /dev/null +++ b/kexec/proc_iomem.c @@ -0,0 +1,13 @@ +#include "kexec.h" + +static const char proc_iomem_str[] = "/proc/iomem"; + +/* + * Allow an architecture specific implementation of this + * function to override the location of a file looking a lot + * like /proc/iomem + */ +const char *proc_iomem(void) +{ + return proc_iomem_str; +} diff --git a/kexec/symbols.c b/kexec/symbols.c new file mode 100644 index 0000000..04377ca --- /dev/null +++ b/kexec/symbols.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <string.h> +#include "kexec.h" + +/* Retrieve kernel symbol virtual address from /proc/kallsyms */ +unsigned long long get_kernel_sym(const char *symbol) +{ + const char *kallsyms = "/proc/kallsyms"; + char sym[128]; + char line[128]; + FILE *fp; + unsigned long long vaddr; + char type; + + fp = fopen(kallsyms, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s\n", kallsyms); + return 0; + } + + while (fgets(line, sizeof(line), fp) != NULL) { + if (sscanf(line, "%llx %c %s", &vaddr, &type, sym) != 3) + continue; + if (strcmp(sym, symbol) == 0) { + dbgprintf("kernel symbol %s vaddr = %16llx\n", + symbol, vaddr); + fclose(fp); + return vaddr; + } + } + + dbgprintf("Cannot get kernel %s symbol address\n", symbol); + + fclose(fp); + return 0; +} diff --git a/kexec/virt_to_phys.c b/kexec/virt_to_phys.c new file mode 100644 index 0000000..a746917 --- /dev/null +++ b/kexec/virt_to_phys.c @@ -0,0 +1,7 @@ +#include "kexec.h" +#include <stdlib.h> + +unsigned long virt_to_phys(unsigned long UNUSED(addr)) +{ + abort(); +} diff --git a/kexec/zlib.c b/kexec/zlib.c new file mode 100644 index 0000000..3ed6bd6 --- /dev/null +++ b/kexec/zlib.c @@ -0,0 +1,129 @@ +#include "kexec-zlib.h" +#include "kexec.h" + +#ifdef HAVE_LIBZ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <ctype.h> +#include <zlib.h> + +static void _gzerror(gzFile fp, int *errnum, const char **errmsg) +{ + *errmsg = gzerror(fp, errnum); + if (*errnum == Z_ERRNO) { + *errmsg = strerror(*errnum); + } +} + +int is_zlib_file(const char *filename, off_t *r_size) +{ + gzFile fp; + int errnum; + int is_zlib_file = 0; /* default: It's not in gzip format */ + const char *msg; + ssize_t result; + + if (!filename) + goto out; + + fp = gzopen(filename, "rb"); + if (fp == 0) { + _gzerror(fp, &errnum, &msg); + dbgprintf("Cannot open `%s': %s\n", filename, msg); + goto out; + } + + if (!gzdirect(fp)) + /* It's in gzip format */ + is_zlib_file = 1; + + result = gzclose(fp); + if (result != Z_OK) { + _gzerror(fp, &errnum, &msg); + dbgprintf(" Close of %s failed: %s\n", filename, msg); + } + +out: + return is_zlib_file; +} + +char *zlib_decompress_file(const char *filename, off_t *r_size) +{ + gzFile fp; + int errnum; + const char *msg; + char *buf = NULL; + off_t size = 0, allocated; + ssize_t result; + + dbgprintf("Try gzip decompression.\n"); + + *r_size = 0; + if (!filename) { + return NULL; + } + fp = gzopen(filename, "rb"); + if (fp == 0) { + _gzerror(fp, &errnum, &msg); + dbgprintf("Cannot open `%s': %s\n", filename, msg); + return NULL; + } + if (gzdirect(fp)) { + /* It's not in gzip format */ + goto fail; + } + allocated = 65536; + buf = xmalloc(allocated); + do { + if (size == allocated) { + allocated <<= 1; + buf = xrealloc(buf, allocated); + } + result = gzread(fp, buf + size, allocated - size); + if (result < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + _gzerror(fp, &errnum, &msg); + dbgprintf("Read on %s of %d bytes failed: %s\n", + filename, (int)(allocated - size), msg); + size = 0; + goto fail; + } + size += result; + } while(result > 0); + +fail: + result = gzclose(fp); + if (result != Z_OK) { + _gzerror(fp, &errnum, &msg); + dbgprintf(" Close of %s failed: %s\n", filename, msg); + } + + if (size > 0) { + *r_size = size; + } else { + free(buf); + buf = NULL; + } + return buf; +} +#else + +int is_zlib_file(const char *filename, off_t *r_size) +{ + return 0; +} + +char *zlib_decompress_file(const char *UNUSED(filename), off_t *UNUSED(r_size)) +{ + return NULL; +} +#endif /* HAVE_ZLIB */ diff --git a/kexec_test/Makefile b/kexec_test/Makefile new file mode 100644 index 0000000..fec6210 --- /dev/null +++ b/kexec_test/Makefile @@ -0,0 +1,40 @@ +# +# kexec_test Debugging payload to be certain the infrastructure works +# +RELOC:=0x10000 +KEXEC_TEST_SRCS:= kexec_test/kexec_test16.S kexec_test/kexec_test.S + +dist += kexec_test/Makefile $(KEXEC_TEST_SRCS) \ + kexec_test/x86-setup-legacy-pic.S + +BUILD_KEXEC_TEST = no +ifeq ($(ARCH),i386) +BUILD_KEXEC_TEST = yes +endif +ifeq ($(ARCH),x86_64) +BUILD_KEXEC_TEST = yes +endif + +ifeq ($(BUILD_KEXEC_TEST),yes) + +KEXEC_TEST_OBJS = $(call objify, $(KEXEC_TEST_SRCS)) +KEXEC_TEST_DEPS = $(call depify, $(KEXEC_TEST_OBJS)) + +KEXEC_TEST = $(PKGLIBDIR)/kexec_test + +clean += $(KEXEC_TEST_OBJS) $(KEXEC_TEST_DEPS) $(KEXEC_TEST) + +-include $(KEXEC_TEST_DEPS) + +$(KEXEC_TEST): CC=$(TARGET_CC) +$(KEXEC_TEST): CPPFLAGS+=-DRELOC=$(RELOC) +$(KEXEC_TEST): ASFLAGS+=-m32 +#$(KEXEC_TEST): LDFLAGS=-m32 -Wl,-e -Wl,_start -Wl,-Ttext -Wl,$(RELOC) \ +# -nostartfiles +$(KEXEC_TEST): LDFLAGS=-melf_i386 -e _start -Ttext $(RELOC) + +$(KEXEC_TEST): $(KEXEC_TEST_OBJS) + mkdir -p $(@D) + $(TARGET_LD) $(LDFLAGS) -o $@ $^ + +endif diff --git a/kexec_test/kexec_test.S b/kexec_test/kexec_test.S new file mode 100644 index 0000000..ad081bc --- /dev/null +++ b/kexec_test/kexec_test.S @@ -0,0 +1,477 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "config.h" + + .equ PROT_CODE_SEG, pmcs - gdt + .equ REAL_CODE_SEG, rmcs - gdt + .equ PROT_DATA_SEG, pmds - gdt + .equ REAL_DATA_SEG, rmds - gdt + .equ CR0_PE, 1 + /* Gas thinks the .equs for these are non-absolute so use a define */ +#define PROT_CODE_SEG 0x08 +#define REAL_CODE_SEG 0x18 +#undef i386 + + .text + .arch i386 + .globl _start +_start: + .code32 + # Disable interrupts + cli + + # Save the initial registers + movl %eax, orig_eax + movl %ebx, orig_ebx + movl %ecx, orig_ecx + movl %edx, orig_edx + movl %esi, orig_esi + movl %edi, orig_edi + movl %esp, orig_esp + movl %ebp, orig_ebp + + # Setup a stack + movl $stack_end, %esp + + # Display a message to say everything is working so far + pushl $s_hello + call print_string + addl $4, %esp + + # Save the idt and gdt + sidt orig_idtp + sgdt orig_gdtp + + # Display the initial register contents + call print_orig_regs + + pushl $s_switching_descriptors + call print_string + addl $4, %esp + + # Load descriptor pointers + lgdt gdtp + lidt idtp + # Reload the data segments + movl $PROT_DATA_SEG, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + # Reload %cs + ljmp $PROT_CODE_SEG, $_start.1 +_start.1: + + pushl $s_descriptors_changed + call print_string + addl $4, %esp + + call setup_legacy_pic + pushl $s_legacy_pic_setup + call print_string + addl $4, %esp + + call prot_to_real + .code16 + + callw test16 + + /* Return to 32bit mode */ + data32 call real_to_prot + .code32 + pushl $s_in_protected_mode + call print_string + addl $4, %esp + + pushl $s_halting + call print_string + addl $4, %esp + jmp halt + + + /* Go from protected to real mode */ +prot_to_real: + .code32 + /* Load the 16bit idt */ + lidt idtp_real + + popl %eax + subl $RELOC, %eax /* Adjust return address */ + pushl %eax + subl $RELOC, %esp /* Adjust stack pointer */ + ljmp $REAL_CODE_SEG, $1f - RELOC +1: + .code16 + /* Reload the segment registers to force a 16bit limit */ + movw $REAL_DATA_SEG, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* Clear the PE bit of CR0 */ + movl %cr0, %eax + andl $0!CR0_PE, %eax + movl %eax, %cr0 + + /* make intersegment jmp to flush the processor pipeline + * and reload %cs:%eip (to clear upper 16 bits of %eip). + */ + data32 ljmp $(RELOC)>>4,$2f- RELOC +2: + /* we are in real mode now + * set up the real mode segment registers + */ + movw %cs,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + movw %ax,%fs + movw %ax,%gs + data32 ret + +real_to_prot: + .code16 + pushl %ebx + + /* Compute the address of gdtp */ + movw %cs, %ax + shlw $4, %ax + movl $gdtp, %ebx + subw %ax, %bx + + data32 lgdt %cs:(%bx) + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + /* flush prefetch queue and reload %cs:%eip */ + data32 ljmp $PROT_CODE_SEG, $1f +1: + .code32 + /* reload other segment registers */ + movl $PROT_DATA_SEG, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + popl %ebx /* Restore %ebx */ + + addl $RELOC, %esp /* Fix up stack pointer */ + + popl %eax /* Fix up return address */ + addl $RELOC, %eax + pushl %eax + + lidt idtp /* Load a dummy idt */ + ret + + +halt: + .code32 + hlt + jmp halt + +print_orig_regs: + .code32 + # Display the initial register contents + pushl $s_eax + call print_string + pushl orig_eax + call print_hex + pushl $space + call print_string + addl $12, %esp + + pushl $s_ebx + call print_string + pushl orig_ebx + call print_hex + pushl $space + call print_string + addl $12, %esp + + + pushl $s_ecx + call print_string + pushl orig_ecx + call print_hex + pushl $space + call print_string + addl $12, %esp + + + pushl $s_edx + call print_string + pushl orig_edx + call print_hex + pushl $crlf + call print_string + addl $12, %esp + + + pushl $s_esi + call print_string + pushl orig_esi + call print_hex + pushl $space + call print_string + addl $12, %esp + + pushl $s_edi + call print_string + pushl orig_edi + call print_hex + pushl $space + call print_string + addl $12, %esp + + + pushl $s_esp + call print_string + pushl orig_esp + call print_hex + pushl $space + call print_string + addl $12, %esp + + + pushl $s_ebp + call print_string + pushl orig_ebp + call print_hex + pushl $crlf + call print_string + addl $12, %esp + + # display the interrupt descritor table pointer + pushl $s_idtp + call print_string + movzwl orig_idtp, %eax + pushl %eax + call print_hex + pushl $space + call print_string + pushl orig_idt_base + call print_hex + pushl $crlf + call print_string + addl $20, %esp + + # display the global descritor table pointer + pushl $s_gdtp + call print_string + movzwl orig_gdtp, %eax + pushl %eax + call print_hex + pushl $space + call print_string + pushl orig_gdt_base + call print_hex + pushl $crlf + call print_string + addl $20, %esp + + ret + + +print_string: + .code32 + pushl %ebp + movl %esp, %ebp + pushl %esi + movl 8(%ebp), %esi + xorl %eax, %eax +print_string.1: + lodsb %ds:(%esi), %al + testb $0xff, %al + jz print_string.2 + call print_char + jmp print_string.1 +print_string.2: + popl %esi + popl %ebp + ret + + +print_hex: + .code32 + pushl %ebp + movl %esp, %ebp + movb $32, %cl +print_hex.1: + movl 8(%ebp), %eax + subb $4, %cl + shrl %cl, %eax + andb $0x0f, %al + cmpb $9, %al + ja print_hex.2 + addb $'0', %al + jmp print_hex.3 +print_hex.2: + addb $'A' - 10, %al +print_hex.3: + pushl %ecx + call print_char + popl %ecx + testb %cl, %cl + jnz print_hex.1 + + popl %ebp + ret + +print_char: + .code32 + # The character to print is in al + call serial_print_char + retl + + +#define TTYS0_BASE 0x3f8 +#define TTYS0_RBR (TTYS0_BASE + 0x00) +#define TTYS0_TBR (TTYS0_BASE + 0x00) +#define TTYS0_LSR (TTYS0_BASE + 0x05) +serial_print_char: + .code32 + # The character to print is in al + pushl %eax + + # Wait until the serial port is ready to receive characters +serial_print_char.1: + movl $TTYS0_LSR, %edx + inb %dx, %al + testb $0x20, %al + jz serial_print_char.1 + + # Output the character + movl $TTYS0_TBR, %edx + movb 0(%esp), %al + outb %al, %dx + + # Wait until the serial port has transmitted the character +serial_print_char.2: + movl $TTYS0_LSR, %edx + inb %dx, %al + testb $0x40, %al + jz serial_print_char.2 + + # Restore %eax + popl %eax + # Return to caller + ret + + .code32 + +idtp_real: + .word 0x400 # idt limit = 256 + .word 0, 0 +idtp: + .word 0 # idt limit = 0 + .word 0, 0 # idt base = 0L + +gdt: +gdtp: + .word gdt_end - gdt - 1 # gdt limit + .long gdt # gdt base + .word 0 # dummy + +pmcs: + # the 32 bit protected mode code segment + .word 0xffff,0 + .byte 0,0x9f,0xcf,0 + +pmds: + # the 32 bit protected mode data segment + .word 0xffff,0 + .byte 0,0x93,0xcf,0 + +rmcs: + # the 16 bit real mode code segment + .word 0xffff,(RELOC&0xffff) + .byte (RELOC>>16),0x9b,0x00,(RELOC>>24) + +rmds: + # the 16 bit real mode data segment + .word 0xffff,(RELOC&0xffff) + .byte (RELOC>>16),0x93,0x00,(RELOC>>24) +gdt_end: + + +s_hello: + .ascii "kexec_test " + .ascii PACKAGE_VERSION + .asciz " starting...\r\n" +s_switching_descriptors: + .asciz "Switching descriptors.\r\n" +s_descriptors_changed: + .asciz "Descriptors changed.\r\n" +s_legacy_pic_setup: + .asciz "Legacy pic setup.\r\n" +s_in_protected_mode: + .asciz "In protected mode.\r\n" +s_halting: + .asciz "Halting.\r\n" + + +space: .asciz " " +crlf: .asciz "\r\n" +s_eax: .asciz "eax: " +s_ebx: .asciz "ebx: " +s_ecx: .asciz "ecx: " +s_edx: .asciz "edx: " +s_esi: .asciz "esi: " +s_edi: .asciz "edi: " +s_esp: .asciz "esp: " +s_ebp: .asciz "ebp: " + + +s_idtp: .asciz "idt: " +s_gdtp: .asciz "gdt: " + +#include "x86-setup-legacy-pic.S" + + .bss + .balign 4096 +stack: + .skip 4096 +stack_end: + + .bss + .balign 4 +orig_eax: .long 0 +orig_ebx: .long 0 +orig_ecx: .long 0 +orig_edx: .long 0 +orig_esi: .long 0 +orig_edi: .long 0 +orig_esp: .long 0 +orig_ebp: .long 0 + + .balign 4 +orig_idtp: .short 0 +orig_idt_base: .long 0 +orig_gdtp: .short 0 +orig_gdt_base: .long 0 + diff --git a/kexec_test/kexec_test16.S b/kexec_test/kexec_test16.S new file mode 100644 index 0000000..4d37915 --- /dev/null +++ b/kexec_test/kexec_test16.S @@ -0,0 +1,1004 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + .text + .code16 + + .globl test16 + .balign 16 + .globl _start16 +_start16: +test16: + pushw $s_in_real_mode - _start16 + call print_string16 + addw $2, %sp + +#if 0 + /* Disable interrupts */ + movb $0xff, %al + outb %al, $0x21 + outb %al, $0xa1 +#endif + /* Enable interrupts, BIOS calls may fail if we don't */ + sti + pushw $s_interrupts_enabled - _start16 + call print_string16 + addw $2, %sp + + /* Get the base memory size, via a bios call */ + /* This is to test BIOS calls more than to achieve anything practical */ + xorw %ax, %ax + int $0x12 + pushw %ax + pushw $s_base_memory_size - _start16 + call print_string16 + addw $2, %sp + call print_hex16 + addw $2, %sp + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + + /* Some things do not like a20 being enabled so disable it */ + call disable_a20 + + /* Here we test various BIOS calls to determine how much of the system is working */ + call get_meme820 + call print_meme820 + call print_meme801 + call print_mem88 + call disable_apm + call print_equipment_list + call print_sysdesc + call print_video + call print_cursor + call print_video_mode + call set_auto_repeat_rate + call print_dasd_type + call print_edd + + /* Enable a20 */ + call enable_a20 + pushw $s_a20_enabled - _start16 + call print_string16 + addw $2, %sp + + /* Disable interrupts */ + cli + pushw $s_interrupts_disabled - _start16 + call print_string16 + addw $2, %sp + + retw + +# +# Enable A20. This is at the very best an annoying procedure. +# A20 code ported from SYSLINUX 1.52-1.63 by H. Peter Anvin. +# + +A20_TEST_LOOPS = 32 # Iterations per wait +A20_ENABLE_LOOPS = 255 # Total loops to try +A20_DISABLE_LOOPS = 255 # Total loops to try + +enable_a20: + .code16 + movb $A20_ENABLE_LOOPS, a20_tries - _start16 +a20_try_loop: + + # First, see if we are on a system with no A20 gate. +a20_none: + call a20_test + jnz a20_done + + # Next, try the BIOS (INT 0x15, AX=0x2401) +a20_bios: + movw $0x2401, %ax + pushfl # Be paranoid about flags + int $0x15 + popfl + + call a20_test + jnz a20_done + + # Try enabling A20 through the keyboard controller +a20_kbc: + call empty_8042 + + call a20_test # Just in case the BIOS worked + jnz a20_done # but had a delayed reaction. + + movb $0xD1, %al # command write + outb %al, $0x64 + call empty_8042 + + movb $0xDF, %al # A20 on + outb %al, $0x60 + call empty_8042 + + # Wait until a20 really *is* enabled; it can take a fair amount of + # time on certain systems; Toshiba Tecras are known to have this + # problem. +a20_kbc_wait: + xorw %cx, %cx +a20_kbc_wait_loop: + call a20_test + jnz a20_done + loop a20_kbc_wait_loop + + # Final attempt: use "configuration port A" +a20_fast: + inb $0x92, %al # Configuration Port A + orb $0x02, %al # "fast A20" version + andb $0xFE, %al # dont accidentally reset + outb %al, $0x92 + + # Wait for configuration port A to take effect +a20_fast_wait: + xorw %cx, %cx +a20_fast_wait_loop: + call a20_test + jnz a20_done + loop a20_fast_wait_loop + + # A20 is still not responding. Try frobbing it again. + # + decb (a20_tries - _start16) + jnz a20_try_loop + jmp a20_die + +a20_die: + pushw $s_a20_err_msg - _start16 + call print_string16 + jmp halt16 + + # If we get here, all is good +a20_done: + ret + + + +# This routine tests whether or not A20 is enabled. If so, it +# exits with zf = 0. +# +# The memory address used, 0x200, is the int $0x80 vector, which +# should be safe. + +A20_TEST_ADDR = 4*0x80 + +a20_test: + .code16 + pushw %cx + pushw %ax + xorw %cx, %cx + movw %cx, %fs # Low memory + decw %cx + movw %cx, %gs # High memory area + movw $A20_TEST_LOOPS, %cx + movw %fs:(A20_TEST_ADDR), %ax + pushw %ax +a20_test_wait: + incw %ax + movw %ax, %fs:(A20_TEST_ADDR) + call delay # Serialize and make delay constant + cmpw %gs:(A20_TEST_ADDR+0x10), %ax + loope a20_test_wait + + popw %fs:(A20_TEST_ADDR) + popw %ax + popw %cx + + ret + +# +# Disable A20 +# + +disable_a20: + .code16 + movb $A20_DISABLE_LOOPS, a20_disable_tries - _start16 +a20_disable_loop: + + # First see if gate A20 is already disabled + call a20_test + jz a20_disabled + + + # Next, try the BIOS (INT 0x15, AX= 0x2400) + movw $0x2400, %ax + pushfl # Be paranoid about flags + int $0x15 + popfl + + call a20_test + jz a20_disabled + + # Try disabling A20 through the keyboard controller + call empty_8042 + + call a20_test # Just in case the BIOS worked + jz a20_disabled # but had a delayed reaction. + + movb $0xD1, %al # command write + outb %al, $0x64 + call empty_8042 + + movb $0xDD, %al # A20 off + outb %al, $0x60 + call empty_8042 + + # Wait until a20 really *is* disabled + xorw %cx, %cx +a20_kbc_disable_loop: + call a20_test + jz a20_disabled + loop a20_kbc_disable_loop + + # Final attempt: use "configuration port A" + inb $0x92, %al # Configuratin Port A + andb $0xFD, %al # "fast A20" version + andb $0xFE, %al # dont accidentally reset + outb %al, $0x92 + + # Wait for configuration port A to take affect + xorw %cx, %cx +a20_fast_disable_loop: + call a20_test + jz a20_disabled + loop a20_fast_disable_loop + + # A20 is still not responding. Try it again + decb (a20_disable_tries - _start16) + jnz a20_disable_loop + + pushw $s_a20_cant_disable - _start16 + call print_string16 + addw $2, %sp + retw + + # If we get here, all is good +a20_disabled: + pushw $s_a20_disabled - _start16 + call print_string16 + addw $2, %sp + retw + + +# This routine checks that the keyboard command queue is empty +# (after emptying the output buffers) +# +# Some machines have delusions that the keyboard buffer is always full +# with no keyboard attached... +# +# If there is no keyboard controller, we will usually get 0xff +# to all the reads. With each IO taking a microsecond and +# a timeout of 100,000 iterations, this can take about half a +# second ("delay" == outb to port 0x80). That should be ok, +# and should also be plenty of time for a real keyboard controller +# to empty. +# + +empty_8042: + .code16 + pushl %ecx + movl $100000, %ecx + +empty_8042_loop: + decl %ecx + jz empty_8042_end_loop + + call delay + + inb $0x64, %al # 8042 status port + testb $1, %al # output buffer? + jz no_output + + call delay + inb $0x60, %al # read it + jmp empty_8042_loop + +no_output: + testb $2, %al # is input buffer full? + jnz empty_8042_loop # yes - loop +empty_8042_end_loop: + popl %ecx + ret + + + + +# method E820H: +# the memory map from hell. e820h returns memory classified into +# a whole bunch of different types, and allows memory holes and +# everything. We scan through this memory map and build a list +# of the first 32 memory areas, which we return at [E820MAP]. +# This is documented at http://www.teleport.com/~acpi/acpihtml/topic245.htm + +#define SMAP 0x534d4150 +#define E820_MAX 32 +#define E820_SIZE 20 + +get_meme820: + .code16 + pushw %bp + movw %sp, %bp + pushw %ds + pushw %es + pushl %esi + pushl %edi + pushl %ebx + + xorl %eax, %eax + movb %al, e820nr - _start16 + xorl %ebx, %ebx # continuation counter + movw $e820_map - _start16, %di # point into the whitelist + # so we can have the bios + # directly write into it. + +jmpe820: + movl $0x0000e820, %eax # e820, upper word zeroed + movl $SMAP, %edx # ascii SMAP + movl $E820_SIZE, %ecx # size of the e820rec + pushw %ds # data record. + popw %es + int $0x15 # make the call + jc bail820 # fall to e801 if it fails + + cmpl $SMAP, %eax # check the return is SMAP + jne bail820 # fall to e801 if it fails + +# cmpl $1, 16(%di) # is this usable memory? +# jne again820 + + # If this is usable memory, we save it by simply advancing %di by + # sizeof(e820rec). + # +good820: + movb e820nr - _start16, %al # up to 32 entries + cmpb $E820_MAX, %al + jnl bail820 + + incb e820nr - _start16 + movw %di, %ax + addw $20, %ax + movw %ax, %di +again820: + cmpl $0, %ebx # check to see if + jne jmpe820 # %ebx is set to EOF +bail820: + popl %ebx + popl %edi + popl %esi + popw %es + popw %ds + popw %bp + retw + + +print_meme820: + .code16 + pushw %si + xorw %cx, %cx + movb (e820nr - _start16), %cl + movw $e820_map - _start16, %si + + pushw $s_meme820 - _start16 + call print_string16 + addw $2, %sp + +print_meme820.1: + pushw %cx + + pushw 8(%si) + pushw 10(%si) + pushw 12(%si) + pushw 14(%si) + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + + pushw $s_at - _start16 + call print_string16 + addw $2, %sp + + pushw 0(%si) + pushw 2(%si) + pushw 4(%si) + pushw 6(%si) + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + + pushw $s_type - _start16 + call print_string16 + addw $2, %sp + + pushw 16(%si) + pushw 18(%si) + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + popw %cx + addw $E820_SIZE, %si + subw $1, %cx + jnz print_meme820.1 + + popw %si + retw + + + +print_meme801: + .code16 + pushw %bp + movw %sp, %bp + pushw %bx + pushl $0 + +# method E801H: +# memory size is in 1k chunksizes + + stc # fix to work around buggy + xorw %cx,%cx # BIOSes which dont clear/set + xorw %dx,%dx # carry on pass/error of + # e801h memory size call + # or merely pass cx,dx though + # without changing them. + movw $0xe801, %ax + int $0x15 + jc print_meme801.2 + + cmpw $0x0, %cx # Kludge to handle BIOSes + jne e801usecxdx # which report their extended + cmpw $0x0, %dx # memory in AX/BX rather than + jne e801usecxdx # CX/DX. The spec I have read + movw %ax, %cx # seems to indicate AX/BX + movw %bx, %dx # are more reasonable anyway... + +e801usecxdx: + andl $0xffff, %edx # clear sign extend + shll $6, %edx # and go from 64k to 1k chunks + movl %edx, -6(%bp) # store extended memory size + andl $0xffff, %ecx # clear sign extend + addl %ecx, -6(%bp) # and add lower memory into + + pushw $s_meme801 - _start16 + call print_string16 + addw $2, %sp + + pushw -6(%bp) + pushw -4(%bp) + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + +print_meme801.2: + addw $4, %sp + popw %bx + popw %bp + retw + +print_mem88: + .code16 +# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or +# 64mb, depending on the bios) in ax. + movb $0x88, %ah + int $0x15 + + pushw %ax + pushw $s_mem88 - _start16 + call print_string16 + addw $2, %sp + call print_hex16 + addw $2, %sp + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +print_dasd_type: + .code16 + pushw $s_dasd_type - _start16 + call print_string16 + addw $2, %sp + + movw $0x1500, %ax + movb $0x81, %dl + int $0x13 + jc print_dasd_type.1 + + pushw %dx + pushw %cx + pushw $s_space - _start16 + pushw %ax + + call print_hex16 + addw $2, %sp + call print_string16 + addw $2, %sp + call print_hex16 + addw $2, %sp + call print_hex16 + addw $2, %sp + jmp print_dasd_type.2 +print_dasd_type.1: + pushw $s_none - _start16 + call print_string16 + addw $2, %sp + +print_dasd_type.2: + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +print_equipment_list: + .code16 + pushw $s_equipment_list - _start16 + call print_string16 + addw $2, %sp + + int $0x11 + pushw %ax + call print_hex16 + addw $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +print_sysdesc: + .code16 + pushw $s_sysdesc - _start16 + call print_string16 + addw $2, %sp + + pushw %es + movb $0xc0, %ah + stc + int $0x15 + movw %es, %ax + popw %es + jc print_sysdesc.1 + + pushw %bx + pushw $s_colon - _start16 + pushw %ax + call print_hex16 + addw $2, %sp + call print_string16 + addw $2, %sp + call print_hex16 + addw $2, %sp + jmp print_sysdesc.2 + +print_sysdesc.1: + pushw $s_none - _start16 + call print_string16 + addw $2, %sp + +print_sysdesc.2: + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +print_edd: + .code16 + pushw $s_edd - _start16 + call print_string16 + add $2, %sp + + movb $0x80, %dl + movb $0x41, %ah # Function 41 + movw $0x55aa, %bx # magic + int $0x13 # make the call + jc print_edd.1 # no more BIOS devices + + cmpw $0xAA55, %bx # is magic right? + jne print_edd.1 # nope + + pushw $s_ok - _start16 + call print_string16 + add $2, %sp + jmp print_edd.2 + +print_edd.1: + pushw $s_none - _start16 + call print_string16 + add $2, %sp + +print_edd.2: + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +set_auto_repeat_rate: + .code16 + pushw $s_auto_repeat_rate - _start16 + call print_string16 + add $2, %sp + +# Set the keyboard repeat rate to the max + movw $0x0305, %ax + xorw %bx, %bx + int $0x16 + + pushw $s_done - _start16 + call print_string16 + add $2, %sp + + retw + +print_video: + .code16 + pushw $s_video_type - _start16 + call print_string16 + add $2, %sp + + movb $0x12, %ah # Check EGA/VGA + movb $0x10, %bl + int $0x10 + movw $s_video_pre_ega - _start16, %cx + cmpb $0x10, %bl + je print_video.1 + + movw $0x1a00, %ax # Check EGA or VGA? + int $0x10 + movw $s_video_vga - _start16, %cx + cmpb $0x1a, %al # 1a means VGA... + je print_video.1 # anything else is EGA. + + movw $s_video_ega - _start16, %cx + +print_video.1: + pushw %cx + call print_string16 + addw $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + addw $2, %sp + + retw + +print_cursor: + .code16 + pushw $s_cursor - _start16 + call print_string16 + add $2, %sp + + movb $0x03, %ah # Read cursor position + xorb %bh, %bh + int $0x10 + + xorw %ax, %ax + movb %dl, %al + pushw %ax + pushw $s_space - _start16 + movb %dh, %al + pushw %ax + + call print_hex16 + add $2, %sp + call print_string16 + add $2, %sp + call print_hex16 + add $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + add $2, %sp + + retw + +print_video_mode: + .code16 + pushw $s_video_mode - _start16 + call print_string16 + add $2, %sp + + movb $0x0f, %ah # Read cursor position + int $0x10 + + xorb %ah, %ah + pushw %ax + call print_hex16 + add $2, %sp + + pushw $s_crlf - _start16 + call print_string16 + add $2, %sp + + retw + + +disable_apm: + push %bp + movw %sp, %bp + pushw %bx + + pushw $s_testing_for_apm - _start16 + call print_string16 + add $2, %sp + + # check for APM BIOS + movw $0x5300, %ax # APM BIOS installation check + xorw %bx, %bx + int $0x15 + jc done_apm_bios # error -> no APM BIOS + + cmpw $0x504d, %bx # check for "PM" signature + jne done_apm_bios # no signature -> no APM BIOS + + pushw $s_apm_found_disconnecting - _start16 + call print_string16 + add $2, %sp + + movw $0x5304, %ax # Disconnect first just in case + xorw %bx, %bx + int $0x15 # ignore return code + + pushw $s_apm_connecting - _start16 + call print_string16 + add $2, %sp + + movw $0x5301, %ax # Real Mode connect + xorw %bx, %bx + int $0x15 + jc done_apm_bios # error + + pushw $s_apm_disabling - _start16 + call print_string16 + add $2, %sp + + movw $0x5308, %ax # Disable APM + mov $0xffff, %bx + xorw %cx, %cx + int $0x15 + + pushw $s_apm_disconnecting - _start16 + call print_string16 + add $2, %sp + + movw $0x5304, %ax # Do a final disconnect + xorw %bx, %bx + int $0x15 + +done_apm_bios: + pushw $s_apm_test_done - _start16 + call print_string16 + add $2, %sp + + popw %bx + popw %bp + retw + + +# Delay is needed after doing I/O +delay: + .code16 + outb %al,$0x80 + retw + +halt16: + .code16 + hlt + jmp halt16 + + +print_string16: + .code16 + pushw %bp + movw %sp, %bp + pushw %si + movw 4(%bp), %si + xorw %ax, %ax +print_string16.1: + lodsb %ds:(%si), %al + testb $0xff, %al + jz print_string16.2 + call print_char16 + jmp print_string16.1 +print_string16.2: + popw %si + popw %bp + ret + +print_hex16: + .code16 + pushw %bp + movw %sp, %bp + movw $16, %cx +print_hex16.1: + movw 4(%bp), %ax + subb $4, %cl + shrw %cl, %ax + andb $0x0f, %al + cmpb $9, %al + ja print_hex16.2 + addb $'0', %al + jmp print_hex16.3 +print_hex16.2: + addb $'A' - 10, %al +print_hex16.3: + pushw %cx + call print_char16 + popw %cx + testb %cl, %cl + jnz print_hex16.1 + + popw %bp + ret + +print_char16: + .code16 + # The character to print is in al + call serial_print_char16 + retw + + +#define TTYS0_BASE 0x3f8 +#define TTYS0_RBR (TTYS0_BASE + 0x00) +#define TTYS0_TBR (TTYS0_BASE + 0x00) +#define TTYS0_LSR (TTYS0_BASE + 0x05) + +serial_print_char16: + .code16 + pushw %bp + movw %sp, %bp + # The character to print is in al + pushw %ax + + # Wait until the serial port is ready to receive characters +serial_print_char16.1: + movw $TTYS0_LSR, %dx + inb %dx, %al + testb $0x20, %al + jz serial_print_char16.1 + + # Output the character + movw $TTYS0_TBR, %dx + movb -2(%bp), %al + outb %al, %dx + + # Wait until the serial port has transmitted the character +serial_print_char16.2: + movw $TTYS0_LSR, %dx + inb %dx, %al + testb $0x40, %al + jz serial_print_char16.2 + + # Restore %eax + popw %ax + # Return to caller + popw %bp + retw + + +s_a20_err_msg: + .asciz "A20 gate not responding!\r\n" + +s_in_real_mode: + .asciz "In real mode.\r\n" +s_base_memory_size: + .asciz "Base memory size: " +s_interrupts_enabled: + .asciz "Interrupts enabled.\r\n" +s_a20_disabled: + .asciz "A20 disabled.\r\n" +s_a20_cant_disable: + .asciz "Can not A20 line.\r\n" +s_a20_enabled: + .asciz "A20 enabled\r\n" +s_interrupts_disabled: + .asciz "Interrupts disabled.\r\n" + +s_meme820: .asciz "E820 Memory Map.\r\n" +s_at: .asciz " @ " +s_type: .asciz " type: " +s_space: .asciz " " +s_colon: .asciz ":" +s_none: .asciz " none " +s_ok: .asciz " ok " +s_done: .asciz " done\r\n" + +s_meme801: + .asciz "E801 Memory size: " +s_mem88: + .asciz "Mem88 Memory size: " + +s_dasd_type: + .asciz "DASD type: " +s_equipment_list: + .asciz "Equiptment list: " +s_sysdesc: + .asciz "Sysdesc: " +s_edd: + .asciz "EDD: " +s_auto_repeat_rate: + .asciz "Setting auto repeat rate " + + +s_video_type: + .asciz "Video type: " +s_video_pre_ega: + .asciz "CGA/MDA/HGA" +s_video_ega: + .asciz "EGA" +s_video_vga: + .asciz "VGA" + +s_cursor: + .asciz "Cursor Position(Row,Column): " + +s_video_mode: + .asciz "Video Mode: " + +s_testing_for_apm: + .asciz "Testing for APM.\r\n" +s_apm_found_disconnecting: + .asciz "APM Found disconnecting.\r\n" +s_apm_connecting: + .asciz "APM connecting.\r\n" +s_apm_disabling: + .asciz "APM disabling.\r\n" +s_apm_disconnecting: + .asciz "APM disconnecting.\r\n" +s_apm_test_done: + .asciz "APM test done.\r\n" + +s_crlf: .asciz "\r\n" + + + +a20_tries: .byte A20_ENABLE_LOOPS +a20_disable_tries: .byte A20_DISABLE_LOOPS + + +e820nr: .byte 0 +e820_map: .fill E820_MAX * E820_SIZE, 1, 0 diff --git a/kexec_test/x86-setup-legacy-pic.S b/kexec_test/x86-setup-legacy-pic.S new file mode 100644 index 0000000..32ef42f --- /dev/null +++ b/kexec_test/x86-setup-legacy-pic.S @@ -0,0 +1,53 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + .text + .code32 +setup_legacy_pic: + /* Load the legacy dos settings into the 8259A pic */ + movb $0xff, %al + outb %al, $0x21 /* mask all of 8259A-1 */ + outb %al, $0xa1 /* mask all of 8259A-1 */ + + movb $0x11, %al + outb %al, $0x20 /* ICW1: select 8259A-1 init */ + outb %al, $0x80 /* A short delay */ + movb $0x08, %al + outb %al, $0x21 /* ICW2: 8259A-1 IR0-7 mappend to 0x8-0xf */ + outb %al, $0x80 /* A short delay */ + movb $01, %al + outb %al, $0x21 /* Normal 8086 auto EOI mode */ + outb %al, $0x80 /* A short delay */ + + + movb $0x11, %al + outb %al, $0xA0 /* ICW1: select 8259A-2 init */ + outb %al, $0x80 /* A short delay */ + movb $0x70, %al + outb %al, $0xA1 /* ICW2: 8259A-2 IR0-7 mappend to 0x70-0x77 */ + outb %al, $0x80 /* A short delay */ + movb $01, %al + outb %al, $0xA1 /* Normal 8086 auto EOI mode */ + outb %al, $0x80 /* A short delay */ + + movb $0, %al + outb %al, $0x21 /* Unmask all of 8259A-1 */ + outb %al, $0xa1 /* Unmask all of 8259A-2 */ + + ret |