diff options
Diffstat (limited to '')
31 files changed, 7687 insertions, 0 deletions
diff --git a/kexec/arch/ppc/Makefile b/kexec/arch/ppc/Makefile new file mode 100644 index 0000000..71871f1 --- /dev/null +++ b/kexec/arch/ppc/Makefile @@ -0,0 +1,34 @@ +# +# kexec ppc (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc_KEXEC_SRCS = kexec/arch/ppc/kexec-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-elf-rel-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-dol-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/kexec-uImage-ppc.c +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-simple.S +ppc_KEXEC_SRCS += kexec/arch/ppc/ppc-setup-dol.S +ppc_KEXEC_SRCS += kexec/arch/ppc/fixup_dtb.c +ppc_KEXEC_SRCS += kexec/arch/ppc/fs2dt.c +ppc_KEXEC_SRCS += kexec/arch/ppc/crashdump-powerpc.c + +ppc_UIMAGE = kexec/kexec-uImage.c + +ppc_libfdt_SRCS = kexec/arch/ppc/libfdt-wrapper.c +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) +ppc_ARCH_REUSE_INITRD = + +ppc_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +ppc_KEXEC_SRCS += $(libfdt_SRCS) $(ppc_libfdt_SRCS) + +ppc_ASFLAGS = -Wa,--noexecstack + +dist += kexec/arch/ppc/Makefile $(ppc_KEXEC_SRCS) \ + kexec/arch/ppc/crashdump-powerpc.h kexec/arch/ppc/fixup_dtb.h \ + kexec/arch/ppc/kexec-ppc.h kexec/arch/ppc/ops.h \ + kexec/arch/ppc/ppc_asm.h \ + kexec/arch/ppc/include/page.h kexec/arch/ppc/include/types.h \ + kexec/arch/ppc/include/arch/options.h diff --git a/kexec/arch/ppc/crashdump-powerpc.c b/kexec/arch/ppc/crashdump-powerpc.c new file mode 100644 index 0000000..15e8531 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.c @@ -0,0 +1,433 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +#ifdef CONFIG_PPC64 +static struct crash_elf_info elf_info64 = { +class: ELFCLASS64, +data: ELFDATA2MSB, +machine: EM_PPC64, +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; +#endif +static struct crash_elf_info elf_info32 = { +class: ELFCLASS32, +data: ELFDATA2MSB, +#ifdef CONFIG_PPC64 +machine: EM_PPC64, +#else +machine: EM_PPC, +#endif +page_offset: PAGE_OFFSET, +lowmem_limit: MAXMEM, +}; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range; +static int crash_nr_memory_ranges; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +/* Append a segment to crash_memory_range, splitting it into two if + * it contains both lowmem and highmem */ +static void add_crash_memory_range(unsigned long long start, + unsigned long long end) +{ +#ifndef CONFIG_PPC64 + if (start < elf_info32.lowmem_limit && end > elf_info32.lowmem_limit) { + add_crash_memory_range(start, elf_info32.lowmem_limit); + add_crash_memory_range(elf_info32.lowmem_limit, end); + return; + } +#endif + + if (crash_nr_memory_ranges < crash_max_memory_ranges) { + crash_memory_range[crash_nr_memory_ranges].start = start; + crash_memory_range[crash_nr_memory_ranges].end = end; + crash_memory_range[crash_nr_memory_ranges].type = RANGE_RAM; + } + + crash_nr_memory_ranges++; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *dmem; + int fd; + struct dirent *dentry, *mentry; + int n, crash_rng_len = 0; + unsigned long long start, end, cstart, cend; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + crash_nr_memory_ranges = 0; + +#ifndef CONFIG_BOOKE + /* create a separate program header for the backup region */ + add_crash_memory_range(BACKUP_SRC_START, BACKUP_SRC_END + 1); +#endif + + dir = opendir(device_tree); + if (!dir) { + perror(device_tree); + goto err; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) + && strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + dmem = opendir(fname); + if (!dmem) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + fd = open(fname, O_RDONLY); + if (fd < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + n = read_memory_region_limits(fd, &start, &end); + /* We are done with fd, close it. */ + close(fd); + if (n != 0) { + closedir(dmem); + closedir(dir); + goto err; + } +#ifndef CONFIG_BOOKE + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; +#endif + + /* + * Exclude the region that lies within crashkernel. + * If memory limit is set then exclude memory region + * above it. + */ + + if (memory_limit) { + if (start >= memory_limit) + continue; + if (end > memory_limit) + end = memory_limit; + } + + /* + * Exclure region used by crash kernel + */ + cstart = crash_base; + cend = crash_base + crash_size; + + if (cstart >= end || cend <= start) + add_crash_memory_range(start, end); + else { + if (start < cstart) + add_crash_memory_range(start, cstart); + if (cend < end) + add_crash_memory_range(cend, end); + } + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + add_crash_memory_range(cstart, cend); + } + + if (crash_nr_memory_ranges >= crash_max_memory_ranges) { + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + *range = crash_memory_range; + *ranges = crash_nr_memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for (j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +/* Converts unsigned long to ascii string. */ +static void ulltoa(unsigned long long i, char *str) +{ + int j = 0, k; + char tmp; + + do { + str[j++] = i % 10 + '0'; + } while ((i /= 10) > 0); + str[j] = '\0'; + + /* Reverse the string. */ + for (j = 0, k = strlen(str) - 1; j < k; j++, k--) { + tmp = str[k]; + str[k] = str[j]; + str[j] = tmp; + } +} + +/* Append str to cmdline */ +static void add_cmdline(char *cmdline, char *str) +{ + int cmdline_size; + int cmdlen = strlen(cmdline) + strlen(str); + + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); +} + +static int add_cmdline_param(char *cmdline, unsigned long long addr, + char *cmdstr, char *byte) +{ + int align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ulltoa(addr, ptr); + strcat(str, byte); + + add_cmdline(cmdline, str); + + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz, elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; +#ifndef CONFIG_BOOKE + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); +#endif + + /* On powerpc memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + +#ifdef CONFIG_PPC64 + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, &tmp, + &sz, ELF_CORE_HEADER_ALIGN) < 0) + return -1; + } else if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) + return -1; +#else + if (crash_create_elf32_headers(info, &elf_info32, crash_memory_range, + nr_ranges, &tmp, &sz, ELF_CORE_HEADER_ALIGN) + < 0) + return -1; +#endif + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, + min_base, max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + add_cmdline(mod_cmdline, " maxcpus=1"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i = 0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; + return; + } else if (end > uend) { + usablemem_rgns.ranges[i].end = end; + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = value; + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + value - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + diff --git a/kexec/arch/ppc/crashdump-powerpc.h b/kexec/arch/ppc/crashdump-powerpc.h new file mode 100644 index 0000000..97b5095 --- /dev/null +++ b/kexec/arch/ppc/crashdump-powerpc.h @@ -0,0 +1,45 @@ +#ifndef CRASHDUMP_POWERPC_H +#define CRASHDUMP_POWERPC_H + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + unsigned long max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +extern struct arch_options_t arch_options; + +#ifdef CONFIG_PPC64 +#define PAGE_OFFSET 0xC000000000000000UL +#define VMALLOCBASE 0xD000000000000000UL +#define MAXMEM (-KERNELBASE-VMALLOCBASE) +#else +#define PAGE_OFFSET 0xC0000000 +#define MAXMEM 0x30000000 /* Use CONFIG_LOWMEM_SIZE from kernel */ +#endif + +#define KERNELBASE PAGE_OFFSET +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + +#ifdef CONFIG_BOOKE +/* We don't need backup region in Book E */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0x0000 +#define BACKUP_SRC_SIZE 0x0000 +#else +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) +#endif + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +extern unsigned long long crash_base; +extern unsigned long long crash_size; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; +extern uint64_t memory_limit; + +#endif /* CRASHDUMP_POWERPC_H */ diff --git a/kexec/arch/ppc/fixup_dtb.c b/kexec/arch/ppc/fixup_dtb.c new file mode 100644 index 0000000..92a0bfd --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.c @@ -0,0 +1,408 @@ +#define _GNU_SOURCE +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include <libfdt.h> +#include "ops.h" +#include "page.h" +#include "fixup_dtb.h" +#include "kexec-ppc.h" + +const char proc_dts[] = "/proc/device-tree"; + +static void print_fdt_reserve_regions(char *blob_buf) +{ + int i, num; + + if (!kexec_debug) + return; + /* Print out a summary of the final reserve regions */ + num = fdt_num_mem_rsv(blob_buf); + dbgprintf ("reserve regions: %d\n", num); + for (i = 0; i < num; i++) { + uint64_t offset, size; + + if (fdt_get_mem_rsv(blob_buf, i, &offset, &size) == 0) { + dbgprintf("%d: offset: %llx, size: %llx\n", i, offset, size); + } else { + dbgprintf("Error retreiving reserved region\n"); + } + } +} + + +static void fixup_nodes(char *nodes[]) +{ + int index = 0; + char *fname; + char *prop_name; + char *node_name; + void *node; + int len; + char *content; + off_t content_size; + int ret; + + while (nodes[index]) { + + len = asprintf(&fname, "%s%s", proc_dts, nodes[index]); + if (len < 0) + die("asprintf() failed\n"); + + content = slurp_file(fname, &content_size); + if (!content) { + die("Can't open %s: %s\n", fname, strerror(errno)); + } + + prop_name = fname + len; + while (*prop_name != '/') + prop_name--; + + *prop_name = '\0'; + prop_name++; + + node_name = fname + sizeof(proc_dts) - 1; + + node = finddevice(node_name); + if (!node) + node = create_node(NULL, node_name + 1); + + ret = setprop(node, prop_name, content, content_size); + if (ret < 0) + die("setprop of %s/%s size: %ld failed: %s\n", + node_name, prop_name, content_size, + fdt_strerror(ret)); + + free(content); + free(fname); + index++; + }; +} + +/* + * command line priority: + * - use the supplied command line + * - if none available use the command line from .dtb + * - if not available use the current command line + */ +static void fixup_cmdline(const char *cmdline) +{ + void *chosen; + char *fixup_cmd_node[] = { + "/chosen/bootargs", + NULL, + }; + + chosen = finddevice("/chosen"); + + if (!cmdline) { + if (!chosen) + fixup_nodes(fixup_cmd_node); + } else { + if (!chosen) + chosen = create_node(NULL, "chosen"); + setprop_str(chosen, "bootargs", cmdline); + } + return; +} + +#define EXPAND_GRANULARITY 1024 + +static char *expand_buf(int minexpand, char *blob_buf, off_t *blob_size) +{ + int size = fdt_totalsize(blob_buf); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + blob_buf = realloc(blob_buf, size); + if (!blob_buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(blob_buf, blob_buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + *blob_size = fdt_totalsize(blob_buf); + + return blob_buf; +} + +static void fixup_reserve_regions(struct kexec_info *info, char *blob_buf) +{ + int ret, i; + int nodeoffset; + u64 val = 0; + + /* If this is a KEXEC kernel we add all regions since they will + * all need to be saved */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + for (i = 0; i < info->nr_segments; i++) { + uint64_t address = (unsigned long)info->segment[i].mem; + uint64_t size = info->segment[i].memsz; + + while ((i+1) < info->nr_segments && + (address + size == (unsigned long)info->segment[i+1].mem)) { + size += info->segment[++i].memsz; + } + + ret = fdt_add_mem_rsv(blob_buf, address, size); + if (ret) { + printf("%s: Error adding memory range to memreserve!\n", + fdt_strerror(ret)); + goto out; + } + } + } else if (ramdisk || reuse_initrd) { + /* Otherwise we just add back the ramdisk and the device tree + * is already in the list */ + ret = fdt_add_mem_rsv(blob_buf, ramdisk_base, ramdisk_size); + if (ret) { + printf("%s: Unable to add new reserved memory for initrd flat device tree\n", + fdt_strerror(ret)); + goto out; + } + } + +#if 0 + /* XXX: Do not reserve spin-table for CPUs. */ + + /* Add reserve regions for cpu-release-addr */ + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, -1, "device_type", "cpu", 4); + while (nodeoffset != -FDT_ERR_NOTFOUND) { + const void *buf; + int sz, ret; + u64 tmp; + + buf = fdt_getprop(blob_buf, nodeoffset, "cpu-release-addr", &sz); + + if (buf) { + if (sz == 4) { + tmp = *(u32 *)buf; + } else if (sz == 8) { + tmp = *(u64 *)buf; + } + + /* crude check to see if last value is repeated */ + if (_ALIGN_DOWN(tmp, PAGE_SIZE) != _ALIGN_DOWN(val, PAGE_SIZE)) { + val = tmp; + ret = fdt_add_mem_rsv(blob_buf, _ALIGN_DOWN(val, PAGE_SIZE), PAGE_SIZE); + if (ret) + printf("%s: Unable to add reserve for cpu-release-addr!\n", + fdt_strerror(ret)); + } + } + + nodeoffset = fdt_node_offset_by_prop_value(blob_buf, nodeoffset, + "device_type", "cpu", 4); + } +#endif + +out: + print_fdt_reserve_regions(blob_buf); +} + +static void fixup_memory(struct kexec_info *info, char *blob_buf) +{ + if (info->kexec_flags & KEXEC_ON_CRASH) { + int nodeoffset, len = 0; + u8 tmp[16]; + const unsigned long *addrcell, *sizecell; + + nodeoffset = fdt_path_offset(blob_buf, "/memory"); + + if (nodeoffset < 0) { + printf("Error searching for memory node!\n"); + return; + } + + addrcell = fdt_getprop(blob_buf, 0, "#address-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((addrcell) && (*addrcell == 2)) { + tmp[0] = (crash_base >> 56) & 0xff; + tmp[1] = (crash_base >> 48) & 0xff; + tmp[2] = (crash_base >> 40) & 0xff; + tmp[3] = (crash_base >> 32) & 0xff; + tmp[4] = (crash_base >> 24) & 0xff; + tmp[5] = (crash_base >> 16) & 0xff; + tmp[6] = (crash_base >> 8) & 0xff; + tmp[7] = (crash_base ) & 0xff; + len = 8; + } else { + tmp[0] = (crash_base >> 24) & 0xff; + tmp[1] = (crash_base >> 16) & 0xff; + tmp[2] = (crash_base >> 8) & 0xff; + tmp[3] = (crash_base ) & 0xff; + len = 4; + } + + sizecell = fdt_getprop(blob_buf, 0, "#size-cells", NULL); + /* use shifts and mask to ensure endianness */ + if ((sizecell) && (*sizecell == 2)) { + tmp[0+len] = (crash_size >> 56) & 0xff; + tmp[1+len] = (crash_size >> 48) & 0xff; + tmp[2+len] = (crash_size >> 40) & 0xff; + tmp[3+len] = (crash_size >> 32) & 0xff; + tmp[4+len] = (crash_size >> 24) & 0xff; + tmp[5+len] = (crash_size >> 16) & 0xff; + tmp[6+len] = (crash_size >> 8) & 0xff; + tmp[7+len] = (crash_size ) & 0xff; + len += 8; + } else { + tmp[0+len] = (crash_size >> 24) & 0xff; + tmp[1+len] = (crash_size >> 16) & 0xff; + tmp[2+len] = (crash_size >> 8) & 0xff; + tmp[3+len] = (crash_size ) & 0xff; + len += 4; + } + + if (fdt_setprop(blob_buf, nodeoffset, "reg", tmp, len) != 0) { + printf ("Error setting memory node!\n"); + } + + fdt_delprop(blob_buf, nodeoffset, "linux,usable-memory"); + } +} + +/* removes crashkernel nodes if they exist and we are *rebooting* + * into a crashkernel. These nodes should not exist after we + * crash and reboot into a new kernel + */ +static void fixup_crashkernel(struct kexec_info *info, char *blob_buf) +{ + int nodeoffset; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + if (nodeoffset < 0) { + printf("fdt_crashkernel: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-base"); + fdt_delprop(blob_buf, nodeoffset, "linux,crashkernel-size"); + } +} +/* remove the old chosen nodes if they exist and add correct chosen + * nodes if we have an initd + */ +static void fixup_initrd(char *blob_buf) +{ + int err, nodeoffset; + unsigned long tmp; + + nodeoffset = fdt_path_offset(blob_buf, "/chosen"); + + if (nodeoffset < 0) { + printf("fdt_initrd: %s\n", fdt_strerror(nodeoffset)); + return; + } + + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-start"); + fdt_delprop(blob_buf, nodeoffset, "linux,initrd-end"); + + if ((reuse_initrd || ramdisk) && + ((ramdisk_base != 0) && (ramdisk_size != 0))) { + tmp = ramdisk_base; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-start", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: " + "could not set linux,initrd-start %s.\n", + fdt_strerror(err)); + return; + } + + tmp = ramdisk_base + ramdisk_size; + err = fdt_setprop(blob_buf, nodeoffset, + "linux,initrd-end", &tmp, sizeof(tmp)); + if (err < 0) { + printf("WARNING: could not set linux,initrd-end %s.\n", + fdt_strerror(err)); + return; + } + } +} + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr) +{ + int ret, i, num = fdt_num_mem_rsv(blob_buf); + + fdt_init(blob_buf); + + /* Remove the existing reserve regions as they will no longer + * be valid after we reboot */ + for (i = num - 1; i >= 0; i--) { + ret = fdt_del_mem_rsv(blob_buf, i); + if (ret) { + printf("%s: Error deleting memory reserve region %d from device tree!\n", + fdt_strerror(ret), i); + } + } + + /* Pack the FDT first, so we don't grow excessively if there is already free space */ + ret = fdt_pack(blob_buf); + if (ret) + printf("%s: Unable to pack flat device tree\n", fdt_strerror(ret)); + + /* info->nr_segments just a guide, will grow by at least EXPAND_GRANULARITY */ + blob_buf = expand_buf(info->nr_segments * sizeof(struct fdt_reserve_entry), + blob_buf, blob_size); + + /* add reserve region for *THIS* fdt */ + *dtb_addr = locate_hole(info, *blob_size, 0, + hole_addr, hole_addr+KERNEL_ACCESS_TOP, -1); + ret = fdt_add_mem_rsv(blob_buf, *dtb_addr, PAGE_ALIGN(*blob_size)); + if (ret) { + printf("%s: Unable to add new reserved memory for the flat device tree\n", + fdt_strerror(ret)); + } + + return blob_buf; +} + +static void save_fixed_up_dtb(char *blob_buf, off_t blob_size) +{ + FILE *fp; + + if (!kexec_debug) + return; + fp = fopen("debug.dtb", "w"); + if (fp) { + if ( blob_size == fwrite(blob_buf, sizeof(char), blob_size, fp)) { + dbgprintf("debug.dtb written\n"); + } else { + dbgprintf("Unable to write debug.dtb\n"); + } + + fclose(fp); + } else { + dbgprintf("Unable to dump flat device tree to debug.dtb\n"); + } +} + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline) +{ + fixup_nodes(nodes); + fixup_cmdline(cmdline); + fixup_reserve_regions(info, blob_buf); + fixup_memory(info, blob_buf); + fixup_initrd(blob_buf); + fixup_crashkernel(info, blob_buf); + + blob_buf = (char *)dt_ops.finalize(); + *blob_size = fdt_totalsize(blob_buf); + + save_fixed_up_dtb(blob_buf, *blob_size); + + return blob_buf; +} diff --git a/kexec/arch/ppc/fixup_dtb.h b/kexec/arch/ppc/fixup_dtb.h new file mode 100644 index 0000000..b706a5a --- /dev/null +++ b/kexec/arch/ppc/fixup_dtb.h @@ -0,0 +1,10 @@ +#ifndef __FIXUP_DTB_H +#define __FIXUP_DTB_H + +char *fixup_dtb_init(struct kexec_info *info, char *blob_buf, off_t *blob_size, + unsigned long hole_addr, unsigned long *dtb_addr); + +char *fixup_dtb_finalize(struct kexec_info *info, char *blob_buf, off_t *blob_size, + char *nodes[], char *cmdline); + +#endif diff --git a/kexec/arch/ppc/fs2dt.c b/kexec/arch/ppc/fs2dt.c new file mode 100644 index 0000000..fed499b --- /dev/null +++ b/kexec/arch/ppc/fs2dt.c @@ -0,0 +1,471 @@ +/* + * fs2dt: creates a flattened device-tree + * + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include "types.h" + +#define MAXPATH 1024 /* max path name length */ +#define NAMESPACE 16384 /* max bytes for property names */ +#define TREEWORDS 65536 /* max 32 bit words for properties */ +#define MEMRESERVE 256 /* max number of reserved memory blks */ +#define MAX_MEMORY_RANGES 1024 + +static char pathname[MAXPATH]; +static char propnames[NAMESPACE] = { 0 }; +static unsigned dtstruct[TREEWORDS], *dt; +static unsigned long long mem_rsrv[2*MEMRESERVE] = { 0, 0 }; + +static int crash_param; +static char local_cmdline[COMMAND_LINE_SIZE] = { "" }; +static unsigned *dt_len; /* changed len of modified cmdline + in flat device-tree */ +static struct bootblock bb[1]; + +void reserve(unsigned long long where, unsigned long long length) +{ + size_t offset; + + for (offset = 0; mem_rsrv[offset + 1]; offset += 2) + ; + + if (offset + 4 >= 2 * MEMRESERVE) + die("unrecoverable error: exhasuted reservation meta data\n"); + + mem_rsrv[offset] = where; + mem_rsrv[offset + 1] = length; + mem_rsrv[offset + 3] = 0; /* N.B: don't care about offset + 2 */ +} + +/* look for properties we need to reserve memory space for */ +static void checkprop(char *name, unsigned *data, int len) +{ + static unsigned long long base, size, end; + + if ((data == NULL) && (base || size || end)) + die("unrecoverable error: no property data"); + else if (!strcmp(name, "linux,rtas-base")) + base = *data; + else if (!strcmp(name, "linux,tce-base")) + base = *(unsigned long long *) data; + else if (!strcmp(name, "rtas-size") || + !strcmp(name, "linux,tce-size")) + size = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) + if (len == 8) + base = *(unsigned long long *) data; + else + base = *data; + else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) + end = *(unsigned long long *) data; + + if (size && end) + die("unrecoverable error: size and end set at same time\n"); + if (base && size) { + reserve(base, size); + base = 0; + size = 0; + } + if (base && end) { + reserve(base, end-base); + base = 0; + end = 0; + } +} + +/* + * return the property index for a property name, creating a new one + * if needed. + */ +static unsigned propnum(const char *name) +{ + unsigned offset = 0; + + while (propnames[offset]) + if (strcmp(name, propnames+offset)) + offset += strlen(propnames+offset)+1; + else + return offset; + + if (NAMESPACE - offset < strlen(name) + 1) + die("unrecoverable error: propnames overrun\n"); + + strcpy(propnames+offset, name); + + return offset; +} + +static void add_usable_mem_property(int fd, int len) +{ + char fname[MAXPATH], *bname; + unsigned long buf[2]; + unsigned long ranges[2*MAX_MEMORY_RANGES]; + unsigned long long base, end, loc_base, loc_end; + int range, rlen = 0; + + strcpy(fname, pathname); + bname = strrchr(fname, '/'); + bname[0] = '\0'; + bname = strrchr(fname, '/'); + if (strncmp(bname, "/memory@", 8) && strcmp(bname, "/memory")) + return; + + if (lseek(fd, 0, SEEK_SET) < 0) + die("unrecoverable error: error seeking in \"%s\": %s\n", + pathname, strerror(errno)); + if (read_memory_region_limits(fd, &base, &end) != 0) + die("unrecoverable error: error parsing memory/reg limits\n"); + + for (range = 0; range < usablemem_rgns.size; range++) { + loc_base = usablemem_rgns.ranges[range].start; + loc_end = usablemem_rgns.ranges[range].end; + if (loc_base >= base && loc_end <= end) { + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + ranges[rlen++] = loc_base; + ranges[rlen++] = loc_end - loc_base; + } + } + + if (!rlen) { + /* + * User did not pass any ranges for thsi region. Hence, write + * (0,0) duple in linux,usable-memory property such that + * this region will be ignored. + */ + ranges[rlen++] = 0; + ranges[rlen++] = 0; + } + + rlen = rlen * sizeof(unsigned long); + /* + * No add linux,usable-memory property. + */ + *dt++ = 3; + *dt++ = rlen; + *dt++ = propnum("linux,usable-memory"); + memcpy(dt, &ranges, rlen); + dt += (rlen + 3)/4; +} + +/* put all properties (files) in the property structure */ +static void putprops(char *fn, struct dirent **nlist, int numlist) +{ + struct dirent *dp; + int i = 0, fd, len; + struct stat statbuf; + + for (i = 0; i < numlist; i++) { + dp = nlist[i]; + strcpy(fn, dp->d_name); + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (!crash_param && !strcmp(fn, "linux,crashkernel-base")) + continue; + + if (!crash_param && !strcmp(fn, "linux,crashkernel-size")) + continue; + + /* + * This property will be created for each node during kexec + * boot. So, ignore it. + */ + if (!strcmp(dp->d_name, "linux,pci-domain") || + !strcmp(dp->d_name, "linux,htab-base") || + !strcmp(dp->d_name, "linux,htab-size") || + !strcmp(dp->d_name, "linux,kernel-end") || + !strcmp(dp->d_name, "linux,usable-memory")) + continue; + + /* This property will be created/modified later in putnode() + * So ignore it, unless we are reusing the initrd. + */ + if ((!strcmp(dp->d_name, "linux,initrd-start") || + !strcmp(dp->d_name, "linux,initrd-end")) && + !reuse_initrd) + continue; + + if (!S_ISREG(statbuf.st_mode)) + continue; + + len = statbuf.st_size; + + *dt++ = 3; + dt_len = dt; + *dt++ = len; + *dt++ = propnum(fn); + + fd = open(pathname, O_RDONLY); + if (fd == -1) + die("unrecoverable error: could not open \"%s\": %s\n", + pathname, strerror(errno)); + + if (read(fd, dt, len) != len) + die("unrecoverable error: could not read \"%s\": %s\n", + pathname, strerror(errno)); + + checkprop(fn, dt, len); + + /* Get the cmdline from the device-tree and modify it */ + if (!strcmp(dp->d_name, "bootargs")) { + int cmd_len; + char temp_cmdline[COMMAND_LINE_SIZE] = { "" }; + char *param = NULL; + cmd_len = strlen(local_cmdline); + if (cmd_len != 0) { + param = strstr(local_cmdline, "crashkernel="); + if (param) + crash_param = 1; + param = strstr(local_cmdline, "root="); + } + if (!param) { + char *old_param; + memcpy(temp_cmdline, dt, len); + param = strstr(temp_cmdline, "root="); + if (param) { + old_param = strtok(param, " "); + if (cmd_len != 0) + strcat(local_cmdline, " "); + strcat(local_cmdline, old_param); + } + } + strcat(local_cmdline, " "); + cmd_len = strlen(local_cmdline); + cmd_len = cmd_len + 1; + memcpy(dt, local_cmdline, cmd_len); + len = cmd_len; + *dt_len = cmd_len; + + dbgprintf("Modified cmdline:%s\n", local_cmdline); + + } + + dt += (len + 3)/4; + if (!strcmp(dp->d_name, "reg") && usablemem_rgns.size) + add_usable_mem_property(fd, len); + close(fd); + } + + fn[0] = '\0'; + checkprop(pathname, NULL, 0); +} + +/* + * Compare function used to sort the device-tree directories + * This function will be passed to scandir. + */ +static int comparefunc(const void *dentry1, const void *dentry2) +{ + char *str1 = (*(struct dirent **)dentry1)->d_name; + char *str2 = (*(struct dirent **)dentry2)->d_name; + char *p1, *p2; + int res = 0, max_len; + + /* + * strcmp scans from left to right and fails to idetify for some + * strings such as memory@10000000 and memory@f000000. + * Therefore, we get the wrong sorted order like memory@10000000 and + * memory@f000000. + */ + if ((p1 = strchr(str1, '@')) && (p2 = strchr(str2, '@'))) { + max_len = max(p1 - str1, p2 - str2); + if ((res = strncmp(str1, str2, max_len)) == 0) { + /* prefix is equal - compare part after '@' by length */ + p1++; p2++; + res = strlen(p1) - strlen(p2); + if (res == 0) + /* equal length, compare by strcmp() */ + res = strcmp(p1,p2); + } + } else { + res = strcmp(str1, str2); + } + + return res; +} + +/* + * put a node (directory) in the property structure. first properties + * then children. + */ +static void putnode(void) +{ + char *dn; + struct dirent *dp; + char *basename; + struct dirent **namelist; + int numlist, i; + struct stat statbuf; + + numlist = scandir(pathname, &namelist, 0, comparefunc); + if (numlist < 0) + die("unrecoverable error: could not scan \"%s\": %s\n", + pathname, strerror(errno)); + if (numlist == 0) + die("unrecoverable error: no directory entries in \"%s\"", + pathname); + + basename = strrchr(pathname, '/') + 1; + + *dt++ = 1; + strcpy((void *)dt, *basename ? basename : ""); + dt += strlen((void *)dt) / sizeof(unsigned) + 1; + + strcat(pathname, "/"); + dn = pathname + strlen(pathname); + + putprops(dn, namelist, numlist); + + /* + * Add initrd entries to the second kernel + * if + * a) a ramdisk is specified in cmdline + * OR + * b) reuseinitrd is specified and a initrd is + * used by the kernel. + * + */ + if ((ramdisk || (initrd_base && reuse_initrd)) + && !strcmp(basename, "chosen/")) { + int len = 8; + unsigned long long initrd_end; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-start"); + + memcpy(dt, &initrd_base, len); + dt += (len + 3)/4; + + len = 8; + *dt++ = 3; + *dt++ = len; + *dt++ = propnum("linux,initrd-end"); + + initrd_end = initrd_base + initrd_size; + + memcpy(dt, &initrd_end, len); + dt += (len + 3)/4; + /* reserve the existing initrd image in case of reuse_initrd */ + if (initrd_base && initrd_size && reuse_initrd) + reserve(initrd_base, initrd_size); + } + + for (i = 0; i < numlist; i++) { + dp = namelist[i]; + strcpy(dn, dp->d_name); + free(namelist[i]); + + if (!strcmp(dn, ".") || !strcmp(dn, "..")) + continue; + + if (lstat(pathname, &statbuf)) + die("unrecoverable error: could not stat \"%s\": %s\n", + pathname, strerror(errno)); + + if (S_ISDIR(statbuf.st_mode)) + putnode(); + } + + *dt++ = 2; + dn[-1] = '\0'; + free(namelist); +} + +int create_flatten_tree(struct kexec_info *info, unsigned char **bufp, + unsigned long *sizep, char *cmdline) +{ + unsigned long len; + unsigned long tlen; + unsigned char *buf; + unsigned long me; + + me = 0; + + strcpy(pathname, "/proc/device-tree/"); + + dt = dtstruct; + + if (cmdline) + strcpy(local_cmdline, cmdline); + + putnode(); + *dt++ = 9; + + len = _ALIGN(sizeof(bb[0]), 8); + + bb->off_mem_rsvmap = len; + + for (len = 1; mem_rsrv[len]; len += 2) + ; + len += 3; + len *= sizeof(mem_rsrv[0]); + + bb->off_dt_struct = bb->off_mem_rsvmap + len; + + len = dt - dtstruct; + len *= sizeof(unsigned); + bb->dt_struct_size = len; + bb->off_dt_strings = bb->off_dt_struct + len; + + len = propnum(""); + bb->dt_strings_size = len; + len = _ALIGN(len, 4); + bb->totalsize = bb->off_dt_strings + len; + + bb->magic = 0xd00dfeed; + bb->version = 17; + bb->last_comp_version = 16; + + reserve(me, bb->totalsize); /* patched later in kexec_load */ + + buf = (unsigned char *) malloc(bb->totalsize); + *bufp = buf; + memcpy(buf, bb, bb->off_mem_rsvmap); + tlen = bb->off_mem_rsvmap; + memcpy(buf+tlen, mem_rsrv, bb->off_dt_struct - bb->off_mem_rsvmap); + tlen = tlen + (bb->off_dt_struct - bb->off_mem_rsvmap); + memcpy(buf+tlen, dtstruct, bb->off_dt_strings - bb->off_dt_struct); + tlen = tlen + (bb->off_dt_strings - bb->off_dt_struct); + memcpy(buf+tlen, propnames, bb->totalsize - bb->off_dt_strings); + tlen = tlen + bb->totalsize - bb->off_dt_strings; + *sizep = tlen; + return 0; +} diff --git a/kexec/arch/ppc/include/arch/options.h b/kexec/arch/ppc/include/arch/options.h new file mode 100644 index 0000000..b2176ab --- /dev/null +++ b/kexec/arch/ppc/include/arch/options.h @@ -0,0 +1,47 @@ +#ifndef KEXEC_ARCH_PPC_OPTIONS_H +#define KEXEC_ARCH_PPC_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_GAMECUBE (OPT_ARCH_MAX+1) +#define OPT_DTB (OPT_ARCH_MAX+2) +#define OPT_NODES (OPT_ARCH_MAX+3) +#define OPT_RAMDISK (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + {"command-line", 1, 0, OPT_APPEND},\ + {"append", 1, 0, OPT_APPEND},\ + {"ramdisk", 1, 0, OPT_APPEND},\ + {"initrd", 1, 0, OPT_APPEND},\ + {"gamecube", 1, 0, OPT_GAMECUBE},\ + {"dtb", 1, 0, OPT_DTB},\ + {"reuse-node", 1, 0, OPT_NODES}, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + +#endif /* KEXEC_ARCH_PPC_OPTIONS_H */ diff --git a/kexec/arch/ppc/include/page.h b/kexec/arch/ppc/include/page.h new file mode 100644 index 0000000..65877bc --- /dev/null +++ b/kexec/arch/ppc/include/page.h @@ -0,0 +1,27 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/kexec/arch/ppc/include/types.h b/kexec/arch/ppc/include/types.h new file mode 100644 index 0000000..31393d1 --- /dev/null +++ b/kexec/arch/ppc/include/types.h @@ -0,0 +1,27 @@ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef signed char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#endif /* _TYPES_H_ */ diff --git a/kexec/arch/ppc/kexec-dol-ppc.c b/kexec/arch/ppc/kexec-dol-ppc.c new file mode 100644 index 0000000..800c072 --- /dev/null +++ b/kexec/arch/ppc/kexec-dol-ppc.c @@ -0,0 +1,468 @@ +/* + * kexec-dol-ppc.c - kexec DOL executable loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "kexec-ppc.h" +#include <arch/options.h> + +static int debug = 0; + +/* + * I've found out there DOLs with unaligned and/or overlapping sections. + * I assume that sizes of sections can be wrong on these DOLs so I trust + * better start of sections. + * In order to load DOLs, I first extend sections to page aligned boundaries + * and then merge overlapping sections starting from lower addresses. + * -- Albert Herranz + */ + +/* DOL related stuff */ + +#define DOL_HEADER_SIZE 0x100 + +#define DOL_SECT_MAX_TEXT 7 /* text sections */ +#define DOL_SECT_MAX_DATA 11 /* data sections */ +#define DOL_MAX_SECT (DOL_SECT_MAX_TEXT+DOL_SECT_MAX_DATA) + +/* this is the DOL executable header */ +typedef struct { + uint32_t offset_text[DOL_SECT_MAX_TEXT]; /* in the file */ + uint32_t offset_data[DOL_SECT_MAX_DATA]; + uint32_t address_text[DOL_SECT_MAX_TEXT]; /* in memory */ + uint32_t address_data[DOL_SECT_MAX_DATA]; + uint32_t size_text[DOL_SECT_MAX_TEXT]; + uint32_t size_data[DOL_SECT_MAX_DATA]; + uint32_t address_bss; + uint32_t size_bss; + uint32_t entry_point; +} dol_header; + +#define dol_sect_offset(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->offset_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->offset_text[index]) +#define dol_sect_address(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->address_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->address_text[index]) +#define dol_sect_size(hptr, index) \ + ((index >= DOL_SECT_MAX_TEXT)? \ + hptr->size_data[index - DOL_SECT_MAX_TEXT] \ + :hptr->size_text[index]) +#define dol_sect_type(index) \ + ((index >= DOL_SECT_MAX_TEXT) ? "data" : "text") + +typedef struct { + uint32_t sects_bitmap; + uint32_t start; + uint32_t size; +} dol_segment; + +#define dol_seg_end(s1) \ + (s1->start + s1->size) +#define dol_seg_after_sect(s1, s2) \ + (s1->start >= dol_seg_end(s2)) +#define dol_seg_overlaps(s1, s2) \ + (!(dol_seg_after_sect(s1,s2) || dol_seg_after_sect(s2,s1))) + +/* same as in asm/page.h */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define MAX_COMMAND_LINE 256 + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4) +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; + +void print_sects_bitmap(dol_segment * seg) +{ + int i, first_seen; + + printf("\t" "sects_bitmap"); + first_seen = 0; + for (i = 0; i < DOL_MAX_SECT; i++) { + if ((seg->sects_bitmap & (1 << i)) == 0) + continue; + printf("%c%d", (first_seen ? ',' : '='), i); + first_seen = 1; + } + printf("\n"); +} + +void print_dol_segment(dol_segment * seg) +{ + printf("dol segment:\n"); + printf("\t" "start=%08lx, size=%ld (%08lx)\n", + (unsigned long)seg->start, (unsigned long)seg->size, + (unsigned long)seg->size); + printf("\t" "end=%08lx\n", (unsigned long)dol_seg_end(seg)); + print_sects_bitmap(seg); +} + +int load_dol_segments(dol_segment * seg, int max_segs, dol_header * h) +{ + int i, n, remaining; + unsigned int start, size; + unsigned long adj1, adj2, end1; + + n = 0; + remaining = max_segs; + for (i = 0; i < DOL_MAX_SECT && remaining > 0; i++) { + /* zero here means the section is not in use */ + if (dol_sect_size(h, i) == 0) + continue; + + /* we initially map 1 seg to 1 sect */ + seg->sects_bitmap = (1 << i); + + start = dol_sect_address(h, i); + size = dol_sect_size(h, i); + + /* page align the segment */ + seg->start = start & PAGE_MASK; + end1 = start + size; + adj1 = start - seg->start; + adj2 = PAGE_ALIGN(end1) - end1; + seg->size = adj1 + size + adj2; + + //print_dol_segment(seg); + + seg++; + remaining--; + n++; + } + return n; +} + +void fix_dol_segments_overlaps(dol_segment * seg, int max_segs) +{ + int i, j; + dol_segment *p, *pp; + long extra_length; + + /* look for overlapping segments and fix them */ + for (i = 0; i < max_segs; i++) { + p = seg + i; /* segment p */ + + /* not really a segment */ + if (p->size == 0) + continue; + + /* check if overlaps any previous segments */ + for (j = 0; j < i; j++) { + pp = seg + j; /* segment pp */ + + /* not a segment or no overlap */ + if (pp->size == 0 || !dol_seg_overlaps(p, pp)) + continue; + + /* merge the two segments */ + if (pp->start < p->start) { + /* extend pp to include p and delete p */ + extra_length = dol_seg_end(p) - dol_seg_end(pp); + if (extra_length > 0) { + pp->size += extra_length; + } + pp->sects_bitmap |= p->sects_bitmap; + p->size = p->start = p->sects_bitmap = 0; + + /* restart the loop because p was deleted */ + i = 0; + break; + } else { + /* extend p to include pp and delete pp */ + extra_length = dol_seg_end(pp) - dol_seg_end(p); + if (extra_length > 0) { + p->size += extra_length; + } + p->sects_bitmap |= pp->sects_bitmap; + pp->size = pp->start = pp->sects_bitmap = 0; + } + } + } +} + +int dol_ppc_probe(const char *buf, off_t dol_length) +{ + dol_header header, *h; + int i, valid = 0; + + /* the DOL file should be at least as long as the DOL header */ + if (dol_length < DOL_HEADER_SIZE) { + if (debug) { + fprintf(stderr, "Not a DOL file, too short.\n"); + } + return -1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* now perform some sanity checks */ + for (i = 0; i < DOL_MAX_SECT; i++) { + /* DOL segment MAY NOT be physically stored in the header */ + if ((dol_sect_offset(h, i) != 0) + && (dol_sect_offset(h, i) < DOL_HEADER_SIZE)) { + if (debug) { + fprintf(stderr, + "%s segment offset within DOL header\n", + dol_sect_type(i)); + } + return -1; + } + + /* end of physical storage must be within file */ + if ((uintmax_t)(dol_sect_offset(h, i) + dol_sect_size(h, i)) > + (uintmax_t)dol_length) { + if (debug) { + fprintf(stderr, + "%s segment past DOL file size\n", + dol_sect_type(i)); + } + return -1; + } + + /* we only should accept DOLs with segments above 2GB */ + if (dol_sect_address(h, i) != 0 + && !(dol_sect_address(h, i) & 0x80000000)) { + fprintf(stderr, "warning, %s segment below 2GB\n", + dol_sect_type(i)); + } + + if (i < DOL_SECT_MAX_TEXT) { + /* remember that entrypoint was in a code segment */ + if (h->entry_point >= dol_sect_address(h, i) + && h->entry_point < dol_sect_address(h, i) + + dol_sect_size(h, i)) + valid = 1; + } + } + + /* if there is a BSS segment it must^H^H^H^Hshould be above 2GB, too */ + if (h->address_bss != 0 && !(h->address_bss & 0x80000000)) { + fprintf(stderr, "warning, BSS segment below 2GB\n"); + } + + /* if entrypoint is not within a code segment reject this file */ + if (!valid) { + if (debug) { + fprintf(stderr, "Entry point out of text segment\n"); + } + return -1; + } + + /* I've got a dol */ + return 0; +} + +void dol_ppc_usage(void) +{ + printf + (" --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n"); + +} + +int dol_ppc_load(int argc, char **argv, const char *buf, off_t UNUSED(len), + struct kexec_info *info) +{ + dol_header header, *h; + unsigned long entry; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + const char *command_line; + int command_line_len; + unsigned long mstart; + dol_segment dol_segs[DOL_MAX_SECT]; + unsigned int sects_bitmap; + unsigned long lowest_start; + int i, j, k; + int opt; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {0, 0, 0, 0}, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR; + + /* + * Parse the command line arguments + */ + command_line = 0; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + command_line = optarg; + break; + } + } + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } + + /* read the DOL header */ + memcpy(&header, buf, sizeof(header)); + h = &header; + + /* set entry point */ + entry = h->entry_point; + + /* convert the DOL sections into suitable page aligned segments */ + memset(dol_segs, 0, sizeof(dol_segs)); + + load_dol_segments(dol_segs, DOL_MAX_SECT, h); + fix_dol_segments_overlaps(dol_segs, DOL_MAX_SECT); + + /* load rest of segments */ + for (i = 0; i < DOL_MAX_SECT; i++) { + unsigned char *seg_buf; + /* not a segment */ + if (dol_segs[i].size == 0) + continue; + + //print_dol_segment(&dol_segs[i]); + + /* prepare segment */ + seg_buf = xmalloc(dol_segs[i].size); + mstart = dol_segs[i].start; + if (mstart & 0xf0000000) { + /* + * GameCube DOLs expect memory mapped this way: + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + mstart &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } + add_segment(info, + seg_buf, dol_segs[i].size, + mstart, dol_segs[i].size); + + + /* load sections into segment memory, according to bitmap */ + sects_bitmap = 0; + while (sects_bitmap != dol_segs[i].sects_bitmap) { + unsigned char *sec_buf; + /* find lowest start address for section */ + lowest_start = 0xffffffff; + for (j = -1, k = 0; k < DOL_MAX_SECT; k++) { + /* continue if section is already done */ + if ((sects_bitmap & (1 << k)) != 0) + continue; + /* do nothing for non sections */ + if ((dol_segs[i].sects_bitmap & (1 << k)) == 0) + continue; + /* found new candidate */ + if (dol_sect_address(h, k) < lowest_start) { + lowest_start = dol_sect_address(h, k); + j = k; + } + } + /* mark section as being loaded */ + sects_bitmap |= (1 << j); + + /* read it from file to the right place */ + sec_buf = seg_buf + + (dol_sect_address(h, j) - dol_segs[i].start); + memcpy(sec_buf, buf + dol_sect_offset(h, j), + dol_sect_size(h, j)); + } + } + + /* build the setup glue and argument segment (segment 0) */ + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_dol_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, 0xFFFFFFFFUL, 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_dol_size, 4)); + + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + setup_dol_regs.spr8 = entry; /* Link Register */ + + memcpy(arg_buf, setup_dol_start, setup_dol_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + + if (debug) { + fprintf(stdout, "entry = %p\n", (void *)arg_base); + print_segments(stdout, info); + } + + info->entry = (void *)arg_base; + return 0; +} diff --git a/kexec/arch/ppc/kexec-elf-ppc.c b/kexec/arch/ppc/kexec-elf-ppc.c new file mode 100644 index 0000000..4a4886e --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-ppc.c @@ -0,0 +1,458 @@ +/* + * kexec-elf-ppc.c - kexec Elf loader for the PowerPC + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc.h" +#include <arch/options.h> +#include "../../kexec-syscall.h" +#include "crashdump-powerpc.h" + +#include "config.h" +#include "fixup_dtb.h" + +static const int probe_debug = 0; + +unsigned char reuse_initrd; +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +#define UPSZ(X) _ALIGN_UP(sizeof(X), 4); +#ifdef WITH_GAMECUBE +static struct boot_notes { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; + unsigned char command_line[0]; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; +#endif + +int elf_ppc_probe(const char *buf, off_t len) +{ + + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_PPC) { + /* for a different architecture */ + if (probe_debug) { + fprintf(stderr, "Not for this architecture.\n"); + } + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +#ifdef WITH_GAMECUBE +static void gamecube_hack_addresses(struct mem_ehdr *ehdr) +{ + struct mem_phdr *phdr, *phdr_end; + phdr_end = ehdr->e_phdr + ehdr->e_phnum; + for(phdr = ehdr->e_phdr; phdr != phdr_end; phdr++) { + /* + * GameCube ELF kernel is linked with memory mapped + * this way (to easily transform it into a DOL + * suitable for being loaded with psoload): + * + * 80000000 - 817fffff 24MB RAM, cached + * c0000000 - c17fffff 24MB RAM, not cached + * + * kexec, instead, needs physical memory layout, so + * we clear the upper bits of the address. + * (2 bits should be enough, indeed) + */ + phdr->p_paddr &= ~0xf0000000; /* clear bits 0-3, ibm syntax */ + } +} +#endif + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"gamecube", 1, 0, OPT_GAMECUBE}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void elf_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --gamecube=1|0 Enable/disable support for ELFs with changed\n" + " addresses suitable for the GameCube.\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *command_line, *crash_cmdline, *cmdline_buf; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + int result; + char *error_msg; + unsigned long max_addr, hole_addr; + struct mem_phdr *phdr; + size_t size; +#ifdef CONFIG_PPC64 + unsigned long toc_addr; +#endif +#ifdef WITH_GAMECUBE + int target_is_gamecube = 1; + char *arg_buf; + size_t arg_bytes; + unsigned long arg_base; + struct boot_notes *notes; + size_t note_bytes; + unsigned char *setup_start; + uint32_t setup_size; +#else + char *seg_buf = NULL; + off_t seg_size = 0; + int target_is_gamecube = 0; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#endif + unsigned long kernel_addr; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + char *blob_buf = NULL; + off_t blob_size = 0; + + command_line = tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + hole_addr = 0; + kernel_addr = 0; + ramdisk = 0; + result = 0; + error_msg = NULL; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_GAMECUBE: + target_is_gamecube = atoi(optarg); + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line); + + fixup_nodes[cur_fixup] = NULL; + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + gamecube_hack_addresses(&ehdr); + } +#endif + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kernel_addr = locate_hole(info, size, 0, 0, max_addr, 1); +#ifdef CONFIG_PPC64 + ehdr.e_phdr[0].p_paddr = (Elf64_Addr)kernel_addr; +#else + ehdr.e_phdr[0].p_paddr = kernel_addr; +#endif + + /* Load the Elf data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) { + goto out; + } + + /* + * Need to append some command line parameters internally in case of + * taking crash dumps. Additional segments need to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + result = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (result < 0) { + result = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + /* + * In case of a toy we take the hardcoded things and an easy setup via + * one of the assembly startups. Every thing else should be grown up + * and go through the purgatory. + */ +#ifdef WITH_GAMECUBE + if (target_is_gamecube) { + setup_start = setup_dol_start; + setup_size = setup_dol_size; + setup_dol_regs.spr8 = ehdr.e_entry; /* Link Register */ + } else { + setup_start = setup_simple_start; + setup_size = setup_simple_size; + setup_simple_regs.spr8 = ehdr.e_entry; /* Link Register */ + } + note_bytes = sizeof(elf_boot_notes) + _ALIGN(command_line_len, 4); + arg_bytes = note_bytes + _ALIGN(setup_size, 4); + + arg_buf = xmalloc(arg_bytes); + arg_base = add_buffer(info, + arg_buf, arg_bytes, arg_bytes, 4, 0, elf_max_addr(&ehdr), 1); + + notes = (struct boot_notes *)(arg_buf + _ALIGN(setup_size, 4)); + + memcpy(arg_buf, setup_start, setup_size); + memcpy(notes, &elf_boot_notes, sizeof(elf_boot_notes)); + memcpy(notes->command_line, command_line, command_line_len); + notes->hdr.b_size = note_bytes; + notes->cmd_hdr.n_descsz = command_line_len; + notes->hdr.b_checksum = compute_ip_checksum(notes, note_bytes); + + info->entry = (void *)arg_base; +#else + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds size\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + if (command_line) + strncat(cmdline_buf, command_line, command_line_len); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, + sizeof(crash_cmdline) - + strlen(crash_cmdline) - 1); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, elf_max_addr(&ehdr), 1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, kernel_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* load the ramdisk *above* the device tree */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size + 1, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + kernel_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* + * set various variables for the purgatory. + * ehdr.e_entry is a virtual address. we know physical start + * address of the kernel (kernel_addr). Find the offset of + * e_entry from the virtual start address(e_phdr[0].p_vaddr) + * and calculate the actual physical address of the 'kernel entry'. + */ + addr = kernel_addr + (ehdr.e_entry - ehdr.e_phdr[0].p_vaddr); + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", + &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, + elf_max_addr(&ehdr), 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)elf_max_addr(&ehdr) - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); + +#undef TCB_SIZE +#undef TCB_TP_OFFSET + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +#endif +out: + free_elf_info(&ehdr); + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + + return result; +} diff --git a/kexec/arch/ppc/kexec-elf-rel-ppc.c b/kexec/arch/ppc/kexec-elf-rel-ppc.c new file mode 100644 index 0000000..1acbd86 --- /dev/null +++ b/kexec/arch/ppc/kexec-elf-rel-ppc.c @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) { + return 0; + } + if (ehdr->ei_class != ELFCLASS32) { + return 0; + } + if (ehdr->e_machine != EM_PPC) { + return 0; + } + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), + struct mem_sym *UNUSED(sym), unsigned long r_type, void *location, + unsigned long address, unsigned long value) +{ + switch(r_type) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HI: + *(uint16_t *)location = (value>>16) & 0xffff; + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - address) < -0x02000000 + || (int)(value - address) >= 0x02000000) + { + die("Symbol more than 16MiB away"); + } + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - address) + & 0x03fffffc); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - address; + break; + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/ppc/kexec-ppc.c b/kexec/arch/ppc/kexec-ppc.c new file mode 100644 index 0000000..03bec36 --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.c @@ -0,0 +1,968 @@ +/* + * kexec-ppc.c - kexec for the PowerPC + * Copyright (C) 2004, 2005 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "crashdump-powerpc.h" +#include <arch/options.h> + +#include "config.h" + +unsigned long dt_address_cells = 0, dt_size_cells = 0; +uint64_t rmo_top; +uint64_t memory_limit; +unsigned long long crash_base = 0, crash_size = 0; +unsigned long long initrd_base = 0, initrd_size = 0; +unsigned long long ramdisk_base = 0, ramdisk_size = 0; +unsigned int rtas_base, rtas_size; +int max_memory_ranges; +const char *ramdisk; + +/* + * Reads the #address-cells and #size-cells on this platform. + * This is used to parse the memory/reg info from the device-tree + */ +int init_memory_region_info() +{ + size_t res = 0; + int fd; + char *file; + + file = "/proc/device-tree/#address-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_address_cells, sizeof(dt_address_cells)); + if (res != sizeof(dt_address_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + file = "/proc/device-tree/#size-cells"; + fd = open(file, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Unable to open %s\n", file); + return -1; + } + + res = read(fd, &dt_size_cells, sizeof(dt_size_cells)); + if (res != sizeof(dt_size_cells)) { + fprintf(stderr, "Error reading %s\n", file); + return -1; + } + close(fd); + + /* Convert the sizes into bytes */ + dt_size_cells *= sizeof(unsigned long); + dt_address_cells *= sizeof(unsigned long); + + return 0; +} + +#define MAXBYTES 128 +/* + * Reads the memory region info from the device-tree node pointed + * by @fd and fills the *start, *end with the boundaries of the region + */ +int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end) +{ + char buf[MAXBYTES]; + unsigned long *p; + unsigned long nbytes = dt_address_cells + dt_size_cells; + + if (lseek(fd, 0, SEEK_SET) == -1) { + fprintf(stderr, "Error in file seek\n"); + return -1; + } + if (read(fd, buf, nbytes) != nbytes) { + fprintf(stderr, "Error reading the memory region info\n"); + return -1; + } + + p = (unsigned long*)buf; + if (dt_address_cells == sizeof(unsigned long)) { + *start = p[0]; + p++; + } else if (dt_address_cells == sizeof(unsigned long long)) { + *start = ((unsigned long long *)p)[0]; + p = (unsigned long long *)p + 1; + } else { + fprintf(stderr, "Unsupported value for #address-cells : %ld\n", + dt_address_cells); + return -1; + } + + if (dt_size_cells == sizeof(unsigned long)) + *end = *start + p[0]; + else if (dt_size_cells == sizeof(unsigned long long)) + *end = *start + ((unsigned long long *)p)[0]; + else { + fprintf(stderr, "Unsupported value for #size-cells : %ld\n", + dt_size_cells); + return -1; + } + + return 0; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +#ifdef WITH_GAMECUBE +#define MAX_MEMORY_RANGES 64 +static struct memory_range memory_range[MAX_MEMORY_RANGES]; + +static int get_memory_ranges_gc(struct memory_range **range, int *ranges, + unsigned long UNUSED(kexec_flags)) +{ + int memory_ranges = 0; + + /* RAM - lowmem used by DOLs - framebuffer */ + memory_range[memory_ranges].start = 0x00003000; + memory_range[memory_ranges].end = 0x0174bfff; + memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + *range = memory_range; + *ranges = memory_ranges; + return 0; +} +#else +static int use_new_dtb; +static int nr_memory_ranges, nr_exclude_ranges; +static struct memory_range *exclude_range; +static struct memory_range *memory_range; +static struct memory_range *base_memory_range; +static uint64_t memory_max; + +/* + * Count the memory nodes under /proc/device-tree and populate the + * max_memory_ranges variable. This variable replaces MAX_MEMORY_RANGES + * macro used earlier. + */ +static int count_memory_ranges(void) +{ + char device_tree[256] = "/proc/device-tree/"; + struct dirent *dentry; + DIR *dir; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + max_memory_ranges++; + } + + /* need to add extra region for retained initrd */ + if (use_new_dtb) { + max_memory_ranges++; + } + + closedir(dir); + return 0; + +} + + static void cleanup_memory_ranges(void) + { + free(memory_range); + free(base_memory_range); + free(exclude_range); + } + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = malloc(memory_range_len); + if (!exclude_range) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, + memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, + memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, + memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, + memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + int local_memory_ranges = 0; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + struct dirent *dentry, *mentry; + int n, fd; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + unsigned long long start, end; + + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if (read_memory_region_limits(fd, &start, &end) != 0) { + close(fd); + closedir(dmem); + closedir(dir); + return -1; + } + if (local_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0){ + close(fd); + break; + } + } + + base_memory_range[local_memory_ranges].start = start; + base_memory_range[local_memory_ranges].end = end; + base_memory_range[local_memory_ranges].type = RANGE_RAM; + local_memory_ranges++; + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[local_memory_ranges-1].start, + base_memory_range[local_memory_ranges-1].end, + base_memory_range[local_memory_ranges-1].type); + close(fd); + } + closedir(dmem); + } + closedir(dir); + nr_memory_ranges = local_memory_ranges; + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +static int read_kernel_memory_limit(char *fname, char *buf) +{ + FILE *file; + int n; + + if (!fname || !buf) + return -1; + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return -1; + } + errno = 0; + /* + * fall through. On older kernel this file + * is not present. Hence return success. + */ + } else { + /* Memory limit property is of u64 type. */ + if ((n = fread(&memory_limit, 1, sizeof(uint64_t), file)) < 0) { + perror(fname); + goto err_out; + } + if (n != sizeof(uint64_t)) { + fprintf(stderr, "%s node has invalid size: %d\n", + fname, n); + goto err_out; + } + fclose(file); + } + return 0; +err_out: + fclose(file); + return -1; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rmo_base; + unsigned long long tce_base; + unsigned int tce_size; + unsigned long long htab_base, htab_size; + unsigned long long kernel_end; + unsigned long long initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strncmp(dentry->d_name, "memory", 6) && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + /* only reserve kernel region if we are doing a crash kernel */ + if (kexec_flags & KEXEC_ON_CRASH) { + strcat(fname, "/linux,kernel-end"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + kernel_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + kernel_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-base"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_base = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_base = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,crashkernel-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + crash_size = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + crash_size = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + +#ifndef CONFIG_BOOKE + add_usable_mem_rgns(0, crash_base + crash_size); + /* Reserve the region (KDUMP_BACKUP_LIMIT,crash_base) */ + reserve(KDUMP_BACKUP_LIMIT, + crash_base-KDUMP_BACKUP_LIMIT); +#else + add_usable_mem_rgns(crash_base, crash_size); +#endif + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + snprintf(fname, sizeof(fname), "%s%s%s", device_tree, + dentry->d_name, "/linux,memory-limit"); + if (read_kernel_memory_limit(fname, buf) < 0) + goto error_opencdir; + + /* reserve the initrd_start and end locations. */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-start"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_start = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_start = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_start = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,initrd-end"); + file = fopen(fname, "r"); + if (!file) { + errno = 0; + initrd_end = 0; + } else { + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + if (n == sizeof(uint32_t)) { + initrd_end = ((uint32_t *)buf)[0]; + } else if (n == sizeof(uint64_t)) { + initrd_end = ((uint64_t *)buf)[0]; + } else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + goto error_openfile; + } + fclose(file); + } + + if ((initrd_end - initrd_start) != 0 ) { + initrd_base = initrd_start; + initrd_size = initrd_end - initrd_start; + } + + if (reuse_initrd) { + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + + /* HTAB */ + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,htab-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + } /* chosen */ + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + closedir(cdir); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + int fd; + strcat(fname, "/reg"); + if ((fd = open(fname, O_RDONLY)) < 0) { + perror(fname); + goto error_opencdir; + } + if (read_memory_region_limits(fd, &rmo_base, &rmo_top) != 0) + goto error_openfile; + + if (rmo_top > 0x30000000UL) + rmo_top = 0x30000000UL; + + close(fd); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + file = fopen(fname, "r"); + if (!file) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(unsigned long), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + memset(fname, 0, sizeof(fname)); + sprintf(fname, "%s%s%s", + device_tree, dentry->d_name, + "/linux,tce-size"); + file = fopen(fname, "r"); + if (!file) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) + != 1) { + perror(fname); + goto error_openfile; + } + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + + +/* Setup a sorted list of memory ranges. */ +static int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit the end to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + /* Limit range to rmo_top */ + if (memory_range[j-1].start >= rmo_top) { + j--; + break; + } + if ((memory_range[j-1].start < rmo_top) && + (memory_range[j-1].end >= rmo_top)) { + memory_range[j-1].end = rmo_top; + break; + } + } + + /* fixup in case we have no exclude regions */ + if (!j) { + memory_range[0].start = base_memory_range[0].start; + memory_range[0].end = rmo_top; + memory_range[0].type = RANGE_RAM; + nr_memory_ranges = 1; + } else + nr_memory_ranges = j; + + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + + +/* Return a list of valid memory ranges */ +int get_memory_ranges_dt(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + if (count_memory_ranges()) + return -1; + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + *range = memory_range; + *ranges = nr_memory_ranges; + return 0; +} +#endif + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + int res = 0; + + res = init_memory_region_info(); + if (res != 0) + return res; +#ifdef WITH_GAMECUBE + return get_memory_ranges_gc(range, ranges, kexec_flags); +#else + return get_memory_ranges_dt(range, ranges, kexec_flags); +#endif +} + +struct file_type file_type[] = { + {"elf-ppc", elf_ppc_probe, elf_ppc_load, elf_ppc_usage}, + {"dol-ppc", dol_ppc_probe, dol_ppc_load, dol_ppc_usage}, + {"uImage-ppc", uImage_ppc_probe, uImage_ppc_load, uImage_ppc_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + return 0; +} + +const struct arch_map_entry arches[] = { + /* For compatibility with older patches + * use KEXEC_ARCH_DEFAULT instead of KEXEC_ARCH_PPC here. + */ + { "ppc", KEXEC_ARCH_DEFAULT }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + diff --git a/kexec/arch/ppc/kexec-ppc.h b/kexec/arch/ppc/kexec-ppc.h new file mode 100644 index 0000000..04e728e --- /dev/null +++ b/kexec/arch/ppc/kexec-ppc.h @@ -0,0 +1,86 @@ +#ifndef KEXEC_PPC_H +#define KEXEC_PPC_H + +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ + +extern unsigned char setup_simple_start[]; +extern uint32_t setup_simple_size; + +extern struct { + uint32_t spr8; +} setup_simple_regs; + +extern unsigned char setup_dol_start[]; +extern uint32_t setup_dol_size; +extern uint64_t rmo_top; + +extern struct { + uint32_t spr8; +} setup_dol_regs; + +#define SIZE_16M (16*1024*1024UL) + +int elf_ppc_probe(const char *buf, off_t len); +int elf_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc_usage(void); + +int uImage_ppc_probe(const char *buf, off_t len); +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void uImage_ppc_usage(void); + +int dol_ppc_probe(const char *buf, off_t len); +int dol_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void dol_ppc_usage(void); + +/* + * During inital setup the kernel does not map the whole memory but a part of + * it. On Book-E that is 64MiB, 601 24MiB or 256MiB (if possible). + */ +#define KERNEL_ACCESS_TOP (24 * 1024 * 1024) + +/* boot block version 17 as defined by the linux kernel */ +struct bootblock { + unsigned magic, + totalsize, + off_dt_struct, + off_dt_strings, + off_mem_rsvmap, + version, + last_comp_version, + boot_physid, + dt_strings_size, + dt_struct_size; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; +extern mem_rgns_t usablemem_rgns; +extern int max_memory_ranges; +extern unsigned long long crash_base, crash_size; +extern unsigned long long initrd_base, initrd_size; +extern unsigned long long ramdisk_base, ramdisk_size; +extern unsigned char reuse_initrd; +extern const char *ramdisk; + +/* Method to parse the memory/reg nodes in device-tree */ +extern unsigned long dt_address_cells, dt_size_cells; +extern int init_memory_region_info(void); +extern int read_memory_region_limits(int fd, unsigned long long *start, + unsigned long long *end); +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); +/*fs2dt*/ +void reserve(unsigned long long where, unsigned long long length); + +/* Defined kexec-uImage-ppc.c */ +extern char* slurp_ramdisk_ppc(const char *filename, off_t *r_size); +#endif /* KEXEC_PPC_H */ diff --git a/kexec/arch/ppc/kexec-uImage-ppc.c b/kexec/arch/ppc/kexec-uImage-ppc.c new file mode 100644 index 0000000..e8f7adc --- /dev/null +++ b/kexec/arch/ppc/kexec-uImage-ppc.c @@ -0,0 +1,325 @@ +/* + * uImage support for PowerPC + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> +#include <image.h> +#include <getopt.h> +#include <arch/options.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc.h" +#include "fixup_dtb.h" +#include <kexec-uImage.h> +#include "crashdump-powerpc.h" +#include <limits.h> + +int create_flatten_tree(struct kexec_info *, unsigned char **, unsigned long *, + char *); + +/* See options.h -- add any more there, too. */ +static const struct option options[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, 0, OPT_APPEND}, + {"append", 1, 0, OPT_APPEND}, + {"ramdisk", 1, 0, OPT_RAMDISK}, + {"initrd", 1, 0, OPT_RAMDISK}, + {"dtb", 1, 0, OPT_DTB}, + {"reuse-node", 1, 0, OPT_NODES}, + {0, 0, 0, 0}, +}; +static const char short_options[] = KEXEC_ARCH_OPT_STR; + +void uImage_ppc_usage(void) +{ + printf( + " --command-line=STRING Set the kernel command line to STRING.\n" + " --append=STRING Set the kernel command line to STRING.\n" + " --ramdisk=<filename> Initial RAM disk.\n" + " --initrd=<filename> same as --ramdisk\n" + " --dtb=<filename> Specify device tree blob file.\n" + " --reuse-node=node Specify nodes which should be taken from /proc/device-tree.\n" + " Can be set multiple times.\n" + ); +} + +/* + * Load the ramdisk into buffer. + * If the supplied image is in uImage format use + * uImage_load() to read the payload from the image. + */ +char *slurp_ramdisk_ppc(const char *filename, off_t *r_size) +{ + struct Image_info img; + off_t size; + const char *buf = slurp_file(filename, &size); + int rc; + + /* Check if this is a uImage RAMDisk */ + if (!buf) + return buf; + rc = uImage_probe_ramdisk(buf, size, IH_ARCH_PPC); + if (rc < 0) + die("uImage: Corrupted ramdisk file %s\n", filename); + else if (rc == 0) { + if (uImage_load(buf, size, &img) != 0) + die("uImage: Reading %ld bytes from %s failed\n", + size, filename); + buf = img.buf; + size = img.len; + } + + *r_size = size; + return buf; +} + +int uImage_ppc_probe(const char *buf, off_t len) +{ + return uImage_probe_kernel(buf, len, IH_ARCH_PPC); +} + +static int ppc_load_bare_bits(int argc, char **argv, const char *buf, + off_t len, struct kexec_info *info, unsigned int load_addr, + unsigned int ep) +{ + char *command_line, *cmdline_buf, *crash_cmdline; + char *tmp_cmdline; + int command_line_len, crash_cmdline_len; + char *dtb; + unsigned int addr; + unsigned long dtb_addr; + unsigned long dtb_addr_actual; +#define FIXUP_ENTRYS (20) + char *fixup_nodes[FIXUP_ENTRYS + 1]; + int cur_fixup = 0; + int opt; + int ret = 0; + char *seg_buf = NULL; + off_t seg_size = 0; + unsigned long long hole_addr; + unsigned long max_addr; + char *blob_buf = NULL; + off_t blob_size = 0; + char *error_msg = NULL; + + cmdline_buf = NULL; + command_line = NULL; + tmp_cmdline = NULL; + dtb = NULL; + max_addr = LONG_MAX; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) { + break; + } + case OPT_APPEND: + tmp_cmdline = optarg; + break; + + case OPT_RAMDISK: + ramdisk = optarg; + break; + + case OPT_DTB: + dtb = optarg; + break; + + case OPT_NODES: + if (cur_fixup >= FIXUP_ENTRYS) { + die("The number of entries for the fixup is too large\n"); + } + fixup_nodes[cur_fixup] = optarg; + cur_fixup++; + break; + } + } + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + command_line_len = 0; + if (tmp_cmdline) { + command_line = tmp_cmdline; + } else { + command_line = get_command_line(); + } + command_line_len = strlen(command_line) + 1; + + fixup_nodes[cur_fixup] = NULL; + + /* + * len contains the length of the whole kernel image except the bss + * section. The 1 MiB should cover it. The purgatory and the dtb are + * allocated from memtop down towards zero so we should never get too + * close to the bss :) + */ +#define _1MiB (1 * 1024 * 1024) + + /* + * If the provided load_addr cannot be allocated, find a new + * area. Rebase the entry point based on the new load_addr. + */ + if (!valid_memory_range(info, load_addr, load_addr + (len + _1MiB))) { + int ep_offset = ep - load_addr; + + load_addr = locate_hole(info, len + _1MiB, 0, 0, max_addr, 1); + if (load_addr == ULONG_MAX) { + printf("Can't allocate memory for kernel of len %ld\n", + len + _1MiB); + return -1; + } + + ep = load_addr + ep_offset; + } + + add_segment(info, buf, len, load_addr, len + _1MiB); + + + if (info->kexec_flags & KEXEC_ON_CRASH) { + crash_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)crash_cmdline, 0, COMMAND_LINE_SIZE); + ret = load_crashdump_segments(info, crash_cmdline, + max_addr, 0); + if (ret < 0) { + ret = -1; + goto out; + } + crash_cmdline_len = strlen(crash_cmdline); + } else { + crash_cmdline = NULL; + crash_cmdline_len = 0; + } + + if (crash_cmdline_len + command_line_len + 1 > COMMAND_LINE_SIZE) { + printf("Kernel command line exceeds maximum possible length\n"); + return -1; + } + + cmdline_buf = xmalloc(COMMAND_LINE_SIZE); + memset((void *)cmdline_buf, 0, COMMAND_LINE_SIZE); + + if (command_line) + strcpy(cmdline_buf, command_line); + if (crash_cmdline) + strncat(cmdline_buf, crash_cmdline, crash_cmdline_len); + + elf_rel_build_load(info, &info->rhdr, (const char *)purgatory, + purgatory_size, 0, -1, -1, 0); + + /* Here we need to initialize the device tree, and find out where + * it is going to live so we can place it directly after the + * kernel image */ + if (dtb) { + /* Grab device tree from buffer */ + blob_buf = slurp_file(dtb, &blob_size); + } else { + create_flatten_tree(info, (unsigned char **)&blob_buf, + (unsigned long *)&blob_size, cmdline_buf); + } + if (!blob_buf || !blob_size) { + error_msg = "Device tree seems to be an empty file.\n"; + goto out2; + } + + /* initial fixup for device tree */ + blob_buf = fixup_dtb_init(info, blob_buf, &blob_size, load_addr, &dtb_addr); + + if (ramdisk) { + seg_buf = slurp_ramdisk_ppc(ramdisk, &seg_size); + /* Load ramdisk at top of memory */ + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, dtb_addr + blob_size, max_addr, -1); + ramdisk_base = hole_addr; + ramdisk_size = seg_size; + } + if (reuse_initrd) { + ramdisk_base = initrd_base; + ramdisk_size = initrd_size; + } + + if (info->kexec_flags & KEXEC_ON_CRASH && ramdisk_base != 0) { + if ( (ramdisk_base < crash_base) || + (ramdisk_base > crash_base + crash_size) ) { + printf("WARNING: ramdisk is above crashkernel region!\n"); + } + else if (ramdisk_base + ramdisk_size > crash_base + crash_size) { + printf("WARNING: ramdisk overflows crashkernel region!\n"); + } + } + + /* Perform final fixup on devie tree, i.e. everything beside what + * was done above */ + fixup_dtb_finalize(info, blob_buf, &blob_size, fixup_nodes, + cmdline_buf); + dtb_addr_actual = add_buffer(info, blob_buf, blob_size, blob_size, 0, dtb_addr, + load_addr + KERNEL_ACCESS_TOP, 1); + if (dtb_addr_actual != dtb_addr) { + printf("dtb_addr_actual: %lx, dtb_addr: %lx\n", dtb_addr_actual, dtb_addr); + error_msg = "Error device tree not loadded to address it was expecting to be loaded too!\n"; + goto out2; + } + + /* set various variables for the purgatory */ + addr = ep; + elf_rel_set_symbol(&info->rhdr, "kernel", &addr, sizeof(addr)); + + addr = dtb_addr; + elf_rel_set_symbol(&info->rhdr, "dt_offset", &addr, sizeof(addr)); + +#define PUL_STACK_SIZE (16 * 1024) + addr = locate_hole(info, PUL_STACK_SIZE, 0, 0, -1, 1); + addr += PUL_STACK_SIZE; + elf_rel_set_symbol(&info->rhdr, "stack", &addr, sizeof(addr)); + /* No allocation past here in order not to overwrite the stack */ +#undef PUL_STACK_SIZE + + /* + * Fixup ThreadPointer(r2) for purgatory. + * PPC32 ELF ABI expects : + * ThreadPointer (TP) = TCB + 0x7000 + * We manually allocate a TCB space and set the TP + * accordingly. + */ +#define TCB_SIZE 1024 +#define TCB_TP_OFFSET 0x7000 /* PPC32 ELF ABI */ + addr = locate_hole(info, TCB_SIZE, 0, 0, + ((unsigned long)-1 - TCB_TP_OFFSET), + 1); + addr += TCB_SIZE + TCB_TP_OFFSET; + elf_rel_set_symbol(&info->rhdr, "my_thread_ptr", &addr, sizeof(addr)); +#undef TCB_TP_OFFSET +#undef TCB_SIZE + + addr = elf_rel_get_addr(&info->rhdr, "purgatory_start"); + info->entry = (void *)addr; + +out2: + free(cmdline_buf); +out: + free(crash_cmdline); + if (!tmp_cmdline) + free(command_line); + if (error_msg) + die("%s", error_msg); + return ret; +} + +int uImage_ppc_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct Image_info img; + int ret; + + ret = uImage_load(buf, len, &img); + if (ret) + return ret; + + return ppc_load_bare_bits(argc, argv, img.buf, img.len, info, + img.base, img.ep); +} diff --git a/kexec/arch/ppc/libfdt-wrapper.c b/kexec/arch/ppc/libfdt-wrapper.c new file mode 100644 index 0000000..ef355d0 --- /dev/null +++ b/kexec/arch/ppc/libfdt-wrapper.c @@ -0,0 +1,189 @@ +/* + * This file does the necessary interface mapping between the bootwrapper + * device tree operations and the interface provided by shared source + * files flatdevicetree.[ch]. + * + * Copyright 2007 David Gibson, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#include <page.h> +#include <libfdt.h> +#include "ops.h" +#include "../../kexec.h" + +#define BAD_ERROR(err) (((err) < 0) \ + && ((err) != -FDT_ERR_NOTFOUND) \ + && ((err) != -FDT_ERR_EXISTS)) + +#define check_err(err) \ + ({ \ + if (BAD_ERROR(err) || ((err < 0) && kexec_debug)) \ + printf("%s():%d %s\n\r", __func__, __LINE__, \ + fdt_strerror(err)); \ + if (BAD_ERROR(err)) \ + exit(1); \ + (err < 0) ? -1 : 0; \ + }) + +#define offset_devp(off) \ + ({ \ + int _offset = (off); \ + check_err(_offset) ? NULL : (void *)(_offset+1); \ + }) + +#define devp_offset_find(devp) (((int)(devp))-1) +#define devp_offset(devp) (devp ? ((int)(devp))-1 : 0) + +static void *fdt; +static void *buf; /* = NULL */ +struct dt_ops dt_ops; + +#define EXPAND_GRANULARITY 1024 + +static void expand_buf(int minexpand) +{ + int size = fdt_totalsize(fdt); + int rc; + + size = _ALIGN(size + minexpand, EXPAND_GRANULARITY); + buf = realloc(buf, size); + if (!buf) + die("Couldn't find %d bytes to expand device tree\n\r", size); + rc = fdt_open_into(fdt, buf, size); + if (rc != 0) + die("Couldn't expand fdt into new buffer: %s\n\r", + fdt_strerror(rc)); + + fdt = buf; +} + +static void *fdt_wrapper_finddevice(const char *path) +{ + return offset_devp(fdt_path_offset(fdt, path)); +} + +static int fdt_wrapper_getprop(const void *devp, const char *name, + void *buf, const int buflen) +{ + const void *p; + int len; + + p = fdt_getprop(fdt, devp_offset(devp), name, &len); + if (!p) + return check_err(len); + memcpy(buf, p, min(len, buflen)); + return len; +} + +static int fdt_wrapper_setprop(const void *devp, const char *name, + const void *buf, const int len) +{ + int rc; + + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + if (rc == -FDT_ERR_NOSPACE) { + expand_buf(len + 16); + rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len); + } + + return check_err(rc); +} + +static void *fdt_wrapper_get_parent(const void *devp) +{ + return offset_devp(fdt_parent_offset(fdt, devp_offset(devp))); +} + +static void *fdt_wrapper_create_node(const void *devp, const char *name) +{ + int offset; + + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + if (offset == -FDT_ERR_NOSPACE) { + expand_buf(strlen(name) + 16); + offset = fdt_add_subnode(fdt, devp_offset(devp), name); + } + + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_prop_value(const void *prev, + const char *name, + const char *val, + int len) +{ + int offset = fdt_node_offset_by_prop_value(fdt, devp_offset_find(prev), + name, val, len); + return offset_devp(offset); +} + +static void *fdt_wrapper_find_node_by_compatible(const void *prev, + const char *val) +{ + int offset = fdt_node_offset_by_compatible(fdt, devp_offset_find(prev), + val); + return offset_devp(offset); +} + +static char *fdt_wrapper_get_path(const void *devp, char *buf, int len) +{ + int rc; + + rc = fdt_get_path(fdt, devp_offset(devp), buf, len); + if (check_err(rc)) + return NULL; + return buf; +} + +static unsigned long fdt_wrapper_finalize(void) +{ + int rc; + + rc = fdt_pack(fdt); + if (rc != 0) + die("Couldn't pack flat tree: %s\n\r", + fdt_strerror(rc)); + return (unsigned long)fdt; +} + +void fdt_init(void *blob) +{ + int err; + int bufsize; + + dt_ops.finddevice = fdt_wrapper_finddevice; + dt_ops.getprop = fdt_wrapper_getprop; + dt_ops.setprop = fdt_wrapper_setprop; + dt_ops.get_parent = fdt_wrapper_get_parent; + dt_ops.create_node = fdt_wrapper_create_node; + dt_ops.find_node_by_prop_value = fdt_wrapper_find_node_by_prop_value; + dt_ops.find_node_by_compatible = fdt_wrapper_find_node_by_compatible; + dt_ops.get_path = fdt_wrapper_get_path; + dt_ops.finalize = fdt_wrapper_finalize; + + /* Make sure the dt blob is the right version and so forth */ + fdt = blob; + bufsize = fdt_totalsize(fdt); + + err = fdt_open_into(fdt, fdt, bufsize); + if (err != 0) + die("fdt_init(): %s\n\r", fdt_strerror(err)); +} diff --git a/kexec/arch/ppc/ops.h b/kexec/arch/ppc/ops.h new file mode 100644 index 0000000..5e7a070 --- /dev/null +++ b/kexec/arch/ppc/ops.h @@ -0,0 +1,147 @@ +/* + * Global definition of all the bootwrapper operations. + * + * Author: Mark A. Greer <mgreer@mvista.com> + * + * 2006 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifndef _PPC_BOOT_OPS_H_ +#define _PPC_BOOT_OPS_H_ +#include "types.h" + +#define MAX_PATH_LEN 256 +#define MAX_PROP_LEN 256 /* What should this be? */ + +typedef void (*kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5); + +/* Device Tree operations */ +struct dt_ops { + void * (*finddevice)(const char *name); + int (*getprop)(const void *phandle, const char *name, void *buf, + const int buflen); + int (*setprop)(const void *phandle, const char *name, + const void *buf, const int buflen); + void *(*get_parent)(const void *phandle); + /* The node must not already exist. */ + void *(*create_node)(const void *parent, const char *name); + void *(*find_node_by_prop_value)(const void *prev, + const char *propname, + const char *propval, int proplen); + void *(*find_node_by_compatible)(const void *prev, + const char *compat); + unsigned long (*finalize)(void); + char *(*get_path)(const void *phandle, char *buf, int len); +}; +extern struct dt_ops dt_ops; + +void fdt_init(void *blob); +extern void flush_cache(void *, unsigned long); +int dt_xlate_reg(void *node, int res, unsigned long *addr, unsigned long *size); +int dt_xlate_addr(void *node, u32 *buf, int buflen, unsigned long *xlated_addr); +int dt_is_compatible(void *node, const char *compat); +void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize); +int dt_get_virtual_reg(void *node, void **addr, int nres); + +static inline void *finddevice(const char *name) +{ + return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL; +} + +static inline int getprop(void *devp, const char *name, void *buf, int buflen) +{ + return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1; +} + +static inline int setprop(void *devp, const char *name, + const void *buf, int buflen) +{ + return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1; +} +#define setprop_val(devp, name, val) \ + do { \ + typeof(val) x = (val); \ + setprop((devp), (name), &x, sizeof(x)); \ + } while (0) + +static inline int setprop_str(void *devp, const char *name, const char *buf) +{ + if (dt_ops.setprop) + return dt_ops.setprop(devp, name, buf, strlen(buf) + 1); + + return -1; +} + +static inline void *get_parent(const char *devp) +{ + return dt_ops.get_parent ? dt_ops.get_parent(devp) : NULL; +} + +static inline void *create_node(const void *parent, const char *name) +{ + return dt_ops.create_node ? dt_ops.create_node(parent, name) : NULL; +} + + +static inline void *find_node_by_prop_value(const void *prev, + const char *propname, + const char *propval, int proplen) +{ + if (dt_ops.find_node_by_prop_value) + return dt_ops.find_node_by_prop_value(prev, propname, + propval, proplen); + + return NULL; +} + +static inline void *find_node_by_prop_value_str(const void *prev, + const char *propname, + const char *propval) +{ + return find_node_by_prop_value(prev, propname, propval, + strlen(propval) + 1); +} + +static inline void *find_node_by_devtype(const void *prev, + const char *type) +{ + return find_node_by_prop_value_str(prev, "device_type", type); +} + +static inline void *find_node_by_alias(const char *alias) +{ + void *devp = finddevice("/aliases"); + + if (devp) { + char path[MAX_PATH_LEN]; + if (getprop(devp, alias, path, MAX_PATH_LEN) > 0) + return finddevice(path); + } + + return NULL; +} + +static inline void *find_node_by_compatible(const void *prev, + const char *compat) +{ + if (dt_ops.find_node_by_compatible) + return dt_ops.find_node_by_compatible(prev, compat); + + return NULL; +} + +#define dt_fixup_mac_addresses(...) \ + __dt_fixup_mac_addresses(0, __VA_ARGS__, NULL) + + +static inline char *get_path(const void *phandle, char *buf, int len) +{ + if (dt_ops.get_path) + return dt_ops.get_path(phandle, buf, len); + + return NULL; +} + +#endif /* _PPC_BOOT_OPS_H_ */ diff --git a/kexec/arch/ppc/ppc-setup-dol.S b/kexec/arch/ppc/ppc-setup-dol.S new file mode 100644 index 0000000..17169bd --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-dol.S @@ -0,0 +1,174 @@ +/* + * ppc-setup-dol.S - setup glue for Nintendo's GameCube + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include "ppc_asm.h" + + .data + .globl setup_dol_start +setup_dol_start: + + /* Try to reproduce the GameCube "native" environment */ + + /* Setup BATs */ + isync + li r8, 0 + mtspr DBAT0U, r8 + mtspr DBAT0L, r8 + mtspr DBAT1U, r8 + mtspr DBAT1L, r8 + mtspr DBAT2U, r8 + mtspr DBAT2L, r8 + mtspr DBAT3U, r8 + mtspr DBAT3L, r8 + mtspr IBAT0U, r8 + mtspr IBAT0L, r8 + mtspr IBAT1U, r8 + mtspr IBAT1L, r8 + mtspr IBAT2U, r8 + mtspr IBAT2L, r8 + mtspr IBAT3U, r8 + mtspr IBAT3L, r8 + + /* + * Memory Map + * start end size description + * 0x80000000 0x817fffff 24MB RAM, uncached + * 0xc0000000 0xc17fffff 24MB RAM, cached + * 0xc8000000 0xc81fffff 2MB Embedded Framebuffer + * 0xcc000000 Hardware registers + * 0xe0000000 Layer 2 transfer cache ??? 256KB + * + */ + + isync + lis r8, 0x8000 /* IBAT0,DBAT0 for first 16Mbytes */ + ori r8, r8, 0x01ff /* 16MB */ + mtspr IBAT0U, r8 + mtspr DBAT0U, r8 + li r8, 0x0002 /* rw */ + mtspr IBAT0L, r8 + mtspr DBAT0L, r8 + + lis r8, 0xc000 /* DBAT1 for IO mem */ + ori r8, r8, 0x1fff /* 256MB */ + mtspr DBAT1U, r8 + li r8, 0x002a /* uncached, guarded ,rw */ + mtspr DBAT1L, r8 + + lis r8, 0x8100 /* IBAT2,DBAT2 for next 8Mbytes */ + ori r8, r8, 0x00ff /* 8MB */ + mtspr IBAT2U, r8 + mtspr DBAT2U, r8 + lis r8, 0x0100 + ori r8, r8, 0x0002 /* rw */ + mtspr IBAT2L, r8 + mtspr DBAT2L, r8 + + lis r8, 0xe000 /* DBAT3 for layer 2 transfer cache ??? */ + ori r8, r8, 0x01fe /* 16MB ??? */ + mtspr DBAT3U, r8 + lis r8, 0xe000 + ori r8, r8, 0x0002 /* rw */ + mtspr DBAT3L, r8 + + sync + isync + +/* AFAIK, this is not strictly needed, although seems sane */ +#if 1 + li r9, 0 + + /* page table pointer */ + sync + mtspr SDR1, r9 + + /* segment registers */ + li r8, 16 + mtctr r8 + li r8, 0 +1: mtsrin r9, r8 /* zero */ + sync + addis r8,r8,0x1000 /* next register */ + bdnz 1b +#endif + + /* switch MMU on and continue */ + RELOC_SYM(1f) + mfmsr r0 + ori r0, r0, MSR_RI|MSR_ME|MSR_DR|MSR_IR + mtspr SRR1, r0 + oris r3, r3, 0x8000 /* adjust text address */ + mtspr SRR0, r3 + oris r1, r1, 0x8000 /* adjust stack */ + sync + rfi + +1: + /* from now on we run in a DOL-like environment */ + + + /* first, sanitize the hardware a little bit */ + /* although seems to be not needed in the general case */ + +#if 1 + /* audio */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + sth r9, 0x5036(r8) /* stop audio sample */ + stw r9, 0x6c00(r8) /* stop streaming */ + stw r9, 0x6c04(r8) /* mute */ + + /* video */ + mfspr r8, 920 /* spr920 = HID2 */ + rlwinm r8, r8, 0, 4, 2 /* stop GX FIFO, and more */ + mtspr 920, r8 + + /* exi */ + lis r8, 0xcc00 /* io mem */ +1: lwz r9,0x680c(r8) /* wait for dma transfer to complete */ + andi. r9,r9,1 + bne+ 1b + stw r9,0x6800(r8) /* disable exi interrupts */ + addi r8,r8,0x14 /* next channel */ + andi. r9,r8,0x40 /* XXX 4 channels? */ + beq+ 1b + + /* pic */ + lis r8, 0xcc00 /* io mem */ + li r9, 0 + stw r9, 0x3004(r8) /* mask all interrupts */ + stw r9, 0x3000(r8) /* clear interrupt cause */ + + /* invalidate L1 data and instructions caches */ + mfspr r8, HID0 + ori r8, r8, HID0_ICFI|HID0_DCI + mtspr HID0, r8 +#endif + + /* jump to our entry point */ + RELOC_SYM(setup_dol_regs) + mr r9, r3 + lwz r5, spr8 - setup_dol_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_dol_regs +setup_dol_regs: +spr8: .long 0x00000000 + + .balign 4 +//#include "isobel_reloc_debug_console.s" + +setup_dol_end: + + .globl setup_dol_size +setup_dol_size: + .long setup_dol_end - setup_dol_start + diff --git a/kexec/arch/ppc/ppc-setup-simple.S b/kexec/arch/ppc/ppc-setup-simple.S new file mode 100644 index 0000000..1317a8d --- /dev/null +++ b/kexec/arch/ppc/ppc-setup-simple.S @@ -0,0 +1,39 @@ +/* + * ppc-setup-simple.S - (hopefully) setup for simple embedded platforms + * Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* + * Only suitable for platforms booting with MMU turned off. + * -- Albert Herranz + */ + +#include "ppc_asm.h" + + .data + .globl setup_simple_start +setup_simple_start: + + /* should perform here any required setup */ + + RELOC_SYM(setup_simple_regs) + mr r9, r3 + lwz r5, spr8 - setup_simple_regs(r9) + + mtlr r5 + blr + + .balign 4 + .globl setup_simple_regs +setup_simple_regs: +spr8: .long 0x00000000 + +setup_simple_end: + + .globl setup_simple_size +setup_simple_size: + .long setup_simple_end - setup_simple_start + diff --git a/kexec/arch/ppc/ppc_asm.h b/kexec/arch/ppc/ppc_asm.h new file mode 100644 index 0000000..36503a9 --- /dev/null +++ b/kexec/arch/ppc/ppc_asm.h @@ -0,0 +1,506 @@ +/* + * ppc_asm.h - mainly bits stolen from Linux kernel asm/reg.h and asm/ppc_asm.h + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +/* Machine State Register (MSR) Fields */ +#define MSR_SF (1<<63) +#define MSR_ISF (1<<61) +#define MSR_VEC (1<<25) /* Enable AltiVec */ +#define MSR_POW (1<<18) /* Enable Power Management */ +#define MSR_WE (1<<18) /* Wait State Enable */ +#define MSR_TGPR (1<<17) /* TLB Update registers in use */ +#define MSR_CE (1<<17) /* Critical Interrupt Enable */ +#define MSR_ILE (1<<16) /* Interrupt Little Endian */ +#define MSR_EE (1<<15) /* External Interrupt Enable */ +#define MSR_PR (1<<14) /* Problem State / Privilege Level */ +#define MSR_FP (1<<13) /* Floating Point enable */ +#define MSR_ME (1<<12) /* Machine Check Enable */ +#define MSR_FE0 (1<<11) /* Floating Exception mode 0 */ +#define MSR_SE (1<<10) /* Single Step */ +#define MSR_BE (1<<9) /* Branch Trace */ +#define MSR_DE (1<<9) /* Debug Exception Enable */ +#define MSR_FE1 (1<<8) /* Floating Exception mode 1 */ +#define MSR_IP (1<<6) /* Exception prefix 0x000/0xFFF */ +#define MSR_IR (1<<5) /* Instruction Relocate */ +#define MSR_DR (1<<4) /* Data Relocate */ +#define MSR_PE (1<<3) /* Protection Enable */ +#define MSR_PX (1<<2) /* Protection Exclusive Mode */ +#define MSR_RI (1<<1) /* Recoverable Exception */ +#define MSR_LE (1<<0) /* Little Endian */ + +/* Special Purpose Registers (SPRNs)*/ +#define SPRN_CTR 0x009 /* Count Register */ +#define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ +#define SPRN_DAR 0x013 /* Data Address Register */ +#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ +#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ +#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ +#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ +#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ +#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ +#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ +#define SPRN_DBAT1U 0x21A /* Data BAT 1 Upper Register */ +#define SPRN_DBAT2L 0x21D /* Data BAT 2 Lower Register */ +#define SPRN_DBAT2U 0x21C /* Data BAT 2 Upper Register */ +#define SPRN_DBAT3L 0x21F /* Data BAT 3 Lower Register */ +#define SPRN_DBAT3U 0x21E /* Data BAT 3 Upper Register */ +#define SPRN_DBAT4L 0x239 /* Data BAT 4 Lower Register */ +#define SPRN_DBAT4U 0x238 /* Data BAT 4 Upper Register */ +#define SPRN_DBAT5L 0x23B /* Data BAT 5 Lower Register */ +#define SPRN_DBAT5U 0x23A /* Data BAT 5 Upper Register */ +#define SPRN_DBAT6L 0x23D /* Data BAT 6 Lower Register */ +#define SPRN_DBAT6U 0x23C /* Data BAT 6 Upper Register */ +#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ +#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ + +#define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define DER_RSTE 0x40000000 /* Reset Interrupt */ +#define DER_CHSTPE 0x20000000 /* Check Stop */ +#define DER_MCIE 0x10000000 /* Machine Check Interrupt */ +#define DER_EXTIE 0x02000000 /* External Interrupt */ +#define DER_ALIE 0x01000000 /* Alignment Interrupt */ +#define DER_PRIE 0x00800000 /* Program Interrupt */ +#define DER_FPUVIE 0x00400000 /* FP Unavailable Interrupt */ +#define DER_DECIE 0x00200000 /* Decrementer Interrupt */ +#define DER_SYSIE 0x00040000 /* System Call Interrupt */ +#define DER_TRE 0x00020000 /* Trace Interrupt */ +#define DER_SEIE 0x00004000 /* FP SW Emulation Interrupt */ +#define DER_ITLBMSE 0x00002000 /* Imp. Spec. Instruction TLB Miss */ +#define DER_ITLBERE 0x00001000 /* Imp. Spec. Instruction TLB Error */ +#define DER_DTLBMSE 0x00000800 /* Imp. Spec. Data TLB Miss */ +#define DER_DTLBERE 0x00000400 /* Imp. Spec. Data TLB Error */ +#define DER_LBRKE 0x00000008 /* Load/Store Breakpoint Interrupt */ +#define DER_IBRKE 0x00000004 /* Instruction Breakpoint Interrupt */ +#define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ +#define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ +#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ +#define SPRN_EAR 0x11A /* External Address Register */ +#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_EMCP (1<<31) /* Enable Machine Check pin */ +#define HID0_EBA (1<<29) /* Enable Bus Address Parity */ +#define HID0_EBD (1<<28) /* Enable Bus Data Parity */ +#define HID0_SBCLK (1<<27) +#define HID0_EICE (1<<26) +#define HID0_TBEN (1<<26) /* Timebase enable - 745x */ +#define HID0_ECLK (1<<25) +#define HID0_PAR (1<<24) +#define HID0_STEN (1<<24) /* Software table search enable - 745x */ +#define HID0_HIGH_BAT (1<<23) /* Enable high BATs - 7455 */ +#define HID0_DOZE (1<<23) +#define HID0_NAP (1<<22) +#define HID0_SLEEP (1<<21) +#define HID0_DPM (1<<20) +#define HID0_BHTCLR (1<<18) /* Clear branch history table - 7450 */ +#define HID0_XAEN (1<<17) /* Extended addressing enable - 7450 */ +#define HID0_NHR (1<<16) /* Not hard reset (software bit-7450)*/ +#define HID0_ICE (1<<15) /* Instruction Cache Enable */ +#define HID0_DCE (1<<14) /* Data Cache Enable */ +#define HID0_ILOCK (1<<13) /* Instruction Cache Lock */ +#define HID0_DLOCK (1<<12) /* Data Cache Lock */ +#define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ +#define HID0_DCI (1<<10) /* Data Cache Invalidate */ +#define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_SGE (1<<7) /* Store Gathering Enable */ +#define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ +#define HID0_DFCA (1<<6) /* Data Cache Flush Assist */ +#define HID0_LRSTK (1<<4) /* Link register stack - 745x */ +#define HID0_BTIC (1<<5) /* Branch Target Instr Cache Enable */ +#define HID0_ABE (1<<3) /* Address Broadcast Enable */ +#define HID0_FOLD (1<<3) /* Branch Folding enable - 745x */ +#define HID0_BHTE (1<<2) /* Branch History Table Enable */ +#define HID0_BTCD (1<<1) /* Branch target cache disable */ +#define HID0_NOPDST (1<<1) /* No-op dst, dstt, etc. instr. */ +#define HID0_NOPTI (1<<0) /* No-op dcbt and dcbst instr. */ + +#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ +#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */ +#define HID1_PC0 (1<<16) /* 7450 PLL_CFG[0] */ +#define HID1_PC1 (1<<15) /* 7450 PLL_CFG[1] */ +#define HID1_PC2 (1<<14) /* 7450 PLL_CFG[2] */ +#define HID1_PC3 (1<<13) /* 7450 PLL_CFG[3] */ +#define HID1_SYNCBE (1<<11) /* 7450 ABE for sync, eieio */ +#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ +#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ +#define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define SPRN_HID5 0x3F6 /* 970 HID5 */ +#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2) +#define SPRN_IAC1 0x3F4 /* Instruction Address Compare 1 */ +#define SPRN_IAC2 0x3F5 /* Instruction Address Compare 2 */ +#endif +#define SPRN_IBAT0L 0x211 /* Instruction BAT 0 Lower Register */ +#define SPRN_IBAT0U 0x210 /* Instruction BAT 0 Upper Register */ +#define SPRN_IBAT1L 0x213 /* Instruction BAT 1 Lower Register */ +#define SPRN_IBAT1U 0x212 /* Instruction BAT 1 Upper Register */ +#define SPRN_IBAT2L 0x215 /* Instruction BAT 2 Lower Register */ +#define SPRN_IBAT2U 0x214 /* Instruction BAT 2 Upper Register */ +#define SPRN_IBAT3L 0x217 /* Instruction BAT 3 Lower Register */ +#define SPRN_IBAT3U 0x216 /* Instruction BAT 3 Upper Register */ +#define SPRN_IBAT4L 0x231 /* Instruction BAT 4 Lower Register */ +#define SPRN_IBAT4U 0x230 /* Instruction BAT 4 Upper Register */ +#define SPRN_IBAT5L 0x233 /* Instruction BAT 5 Lower Register */ +#define SPRN_IBAT5U 0x232 /* Instruction BAT 5 Upper Register */ +#define SPRN_IBAT6L 0x235 /* Instruction BAT 6 Lower Register */ +#define SPRN_IBAT6U 0x234 /* Instruction BAT 6 Upper Register */ +#define SPRN_IBAT7L 0x237 /* Instruction BAT 7 Lower Register */ +#define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */ +#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ +#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */ +#define ICTRL_EICE 0x08000000 /* enable icache parity errs */ +#define ICTRL_EDC 0x04000000 /* enable dcache parity errs */ +#define ICTRL_EICP 0x00000100 /* enable icache par. check */ +#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ +#define SPRN_IMMR 0x27E /* Internal Memory Map Register */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR2 0x3f8 +#define L2CR_L2E 0x80000000 /* L2 enable */ +#define L2CR_L2PE 0x40000000 /* L2 parity enable */ +#define L2CR_L2SIZ_MASK 0x30000000 /* L2 size mask */ +#define L2CR_L2SIZ_256KB 0x10000000 /* L2 size 256KB */ +#define L2CR_L2SIZ_512KB 0x20000000 /* L2 size 512KB */ +#define L2CR_L2SIZ_1MB 0x30000000 /* L2 size 1MB */ +#define L2CR_L2CLK_MASK 0x0e000000 /* L2 clock mask */ +#define L2CR_L2CLK_DISABLED 0x00000000 /* L2 clock disabled */ +#define L2CR_L2CLK_DIV1 0x02000000 /* L2 clock / 1 */ +#define L2CR_L2CLK_DIV1_5 0x04000000 /* L2 clock / 1.5 */ +#define L2CR_L2CLK_DIV2 0x08000000 /* L2 clock / 2 */ +#define L2CR_L2CLK_DIV2_5 0x0a000000 /* L2 clock / 2.5 */ +#define L2CR_L2CLK_DIV3 0x0c000000 /* L2 clock / 3 */ +#define L2CR_L2RAM_MASK 0x01800000 /* L2 RAM type mask */ +#define L2CR_L2RAM_FLOW 0x00000000 /* L2 RAM flow through */ +#define L2CR_L2RAM_PIPE 0x01000000 /* L2 RAM pipelined */ +#define L2CR_L2RAM_PIPE_LW 0x01800000 /* L2 RAM pipelined latewr */ +#define L2CR_L2DO 0x00400000 /* L2 data only */ +#define L2CR_L2I 0x00200000 /* L2 global invalidate */ +#define L2CR_L2CTL 0x00100000 /* L2 RAM control */ +#define L2CR_L2WT 0x00080000 /* L2 write-through */ +#define L2CR_L2TS 0x00040000 /* L2 test support */ +#define L2CR_L2OH_MASK 0x00030000 /* L2 output hold mask */ +#define L2CR_L2OH_0_5 0x00000000 /* L2 output hold 0.5 ns */ +#define L2CR_L2OH_1_0 0x00010000 /* L2 output hold 1.0 ns */ +#define L2CR_L2SL 0x00008000 /* L2 DLL slow */ +#define L2CR_L2DF 0x00004000 /* L2 differential clock */ +#define L2CR_L2BYP 0x00002000 /* L2 DLL bypass */ +#define L2CR_L2IP 0x00000001 /* L2 GI in progress */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define L3CR_L3E 0x80000000 /* L3 enable */ +#define L3CR_L3PE 0x40000000 /* L3 data parity enable */ +#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ +#define L3CR_L3SIZ 0x10000000 /* L3 size */ +#define L3CR_L3CLKEN 0x08000000 /* L3 clock enable */ +#define L3CR_L3RES 0x04000000 /* L3 special reserved bit */ +#define L3CR_L3CLKDIV 0x03800000 /* L3 clock divisor */ +#define L3CR_L3IO 0x00400000 /* L3 instruction only */ +#define L3CR_L3SPO 0x00040000 /* L3 sample point override */ +#define L3CR_L3CKSP 0x00030000 /* L3 clock sample point */ +#define L3CR_L3PSP 0x0000e000 /* L3 P-clock sample point */ +#define L3CR_L3REP 0x00001000 /* L3 replacement algorithm */ +#define L3CR_L3HWF 0x00000800 /* L3 hardware flush */ +#define L3CR_L3I 0x00000400 /* L3 global invalidate */ +#define L3CR_L3RT 0x00000300 /* L3 SRAM type */ +#define L3CR_L3NIRCA 0x00000080 /* L3 non-integer ratio clock adj. */ +#define L3CR_L3DO 0x00000040 /* L3 data only mode */ +#define L3CR_PMEN 0x00000004 /* L3 private memory enable */ +#define L3CR_PMSIZ 0x00000001 /* L3 private memory size */ +#define SPRN_MSSCR0 0x3f6 /* Memory Subsystem Control Register 0 */ +#define SPRN_MSSSR0 0x3f7 /* Memory Subsystem Status Register 1 */ +#define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ +#define SPRN_LDSTDB 0x3f4 /* */ +#define SPRN_LR 0x008 /* Link Register */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ +#ifndef SPRN_PIR +#define SPRN_PIR 0x3FF /* Processor Identification Register */ +#endif +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ +#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ +#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define SPRN_RPA 0x3D6 /* Required Physical Address Register */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ +#define SPRN_SDR1 0x019 /* MMU Hash Base Register */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ +#define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ +#define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ +#define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ +#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ +#define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */ +#define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */ +#define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */ +#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ +#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ +#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_THRM1 0x3FC /* Thermal Management Register 1 */ +/* these bits were defined in inverted endian sense originally, ugh, confusing */ +#define THRM1_TIN (1 << 31) +#define THRM1_TIV (1 << 30) +#define THRM1_THRES(x) ((x&0x7f)<<23) +#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM1_TID (1<<2) +#define THRM1_TIE (1<<1) +#define THRM1_V (1<<0) +#define SPRN_THRM2 0x3FD /* Thermal Management Register 2 */ +#define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ +#define THRM3_E (1<<0) +#define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ +#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ +#define SPRN_XER 0x001 /* Fixed Point Exception Register */ + +/* Bit definitions for MMCR0 and PMC1 / PMC2. */ +#define MMCR0_PMC1_CYCLES (1 << 7) +#define MMCR0_PMC1_ICACHEMISS (5 << 7) +#define MMCR0_PMC1_DTLB (6 << 7) +#define MMCR0_PMC2_DCACHEMISS 0x6 +#define MMCR0_PMC2_CYCLES 0x1 +#define MMCR0_PMC2_ITLB 0x7 +#define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Short-hand versions for a number of the above SPRNs */ +#define CTR SPRN_CTR /* Counter Register */ +#define DAR SPRN_DAR /* Data Address Register */ +#define DABR SPRN_DABR /* Data Address Breakpoint Register */ +#define DBAT0L SPRN_DBAT0L /* Data BAT 0 Lower Register */ +#define DBAT0U SPRN_DBAT0U /* Data BAT 0 Upper Register */ +#define DBAT1L SPRN_DBAT1L /* Data BAT 1 Lower Register */ +#define DBAT1U SPRN_DBAT1U /* Data BAT 1 Upper Register */ +#define DBAT2L SPRN_DBAT2L /* Data BAT 2 Lower Register */ +#define DBAT2U SPRN_DBAT2U /* Data BAT 2 Upper Register */ +#define DBAT3L SPRN_DBAT3L /* Data BAT 3 Lower Register */ +#define DBAT3U SPRN_DBAT3U /* Data BAT 3 Upper Register */ +#define DBAT4L SPRN_DBAT4L /* Data BAT 4 Lower Register */ +#define DBAT4U SPRN_DBAT4U /* Data BAT 4 Upper Register */ +#define DBAT5L SPRN_DBAT5L /* Data BAT 5 Lower Register */ +#define DBAT5U SPRN_DBAT5U /* Data BAT 5 Upper Register */ +#define DBAT6L SPRN_DBAT6L /* Data BAT 6 Lower Register */ +#define DBAT6U SPRN_DBAT6U /* Data BAT 6 Upper Register */ +#define DBAT7L SPRN_DBAT7L /* Data BAT 7 Lower Register */ +#define DBAT7U SPRN_DBAT7U /* Data BAT 7 Upper Register */ +#define DEC SPRN_DEC /* Decrement Register */ +#define DMISS SPRN_DMISS /* Data TLB Miss Register */ +#define DSISR SPRN_DSISR /* Data Storage Interrupt Status Register */ +#define EAR SPRN_EAR /* External Address Register */ +#define HASH1 SPRN_HASH1 /* Primary Hash Address Register */ +#define HASH2 SPRN_HASH2 /* Secondary Hash Address Register */ +#define HID0 SPRN_HID0 /* Hardware Implementation Register 0 */ +#define HID1 SPRN_HID1 /* Hardware Implementation Register 1 */ +#define IABR SPRN_IABR /* Instruction Address Breakpoint Register */ +#define IBAT0L SPRN_IBAT0L /* Instruction BAT 0 Lower Register */ +#define IBAT0U SPRN_IBAT0U /* Instruction BAT 0 Upper Register */ +#define IBAT1L SPRN_IBAT1L /* Instruction BAT 1 Lower Register */ +#define IBAT1U SPRN_IBAT1U /* Instruction BAT 1 Upper Register */ +#define IBAT2L SPRN_IBAT2L /* Instruction BAT 2 Lower Register */ +#define IBAT2U SPRN_IBAT2U /* Instruction BAT 2 Upper Register */ +#define IBAT3L SPRN_IBAT3L /* Instruction BAT 3 Lower Register */ +#define IBAT3U SPRN_IBAT3U /* Instruction BAT 3 Upper Register */ +#define IBAT4L SPRN_IBAT4L /* Instruction BAT 4 Lower Register */ +#define IBAT4U SPRN_IBAT4U /* Instruction BAT 4 Upper Register */ +#define IBAT5L SPRN_IBAT5L /* Instruction BAT 5 Lower Register */ +#define IBAT5U SPRN_IBAT5U /* Instruction BAT 5 Upper Register */ +#define IBAT6L SPRN_IBAT6L /* Instruction BAT 6 Lower Register */ +#define IBAT6U SPRN_IBAT6U /* Instruction BAT 6 Upper Register */ +#define IBAT7L SPRN_IBAT7L /* Instruction BAT 7 Lower Register */ +#define IBAT7U SPRN_IBAT7U /* Instruction BAT 7 Upper Register */ +#define ICMP SPRN_ICMP /* Instruction TLB Compare Register */ +#define IMISS SPRN_IMISS /* Instruction TLB Miss Register */ +#define IMMR SPRN_IMMR /* PPC 860/821 Internal Memory Map Register */ +#define L2CR SPRN_L2CR /* Classic PPC L2 cache control register */ +#define L3CR SPRN_L3CR /* PPC 745x L3 cache control register */ +#define LR SPRN_LR +#define PVR SPRN_PVR /* Processor Version */ +#define RPA SPRN_RPA /* Required Physical Address Register */ +#define SDR1 SPRN_SDR1 /* MMU hash base register */ +#define SPR0 SPRN_SPRG0 /* Supervisor Private Registers */ +#define SPR1 SPRN_SPRG1 +#define SPR2 SPRN_SPRG2 +#define SPR3 SPRN_SPRG3 +#define SPR4 SPRN_SPRG4 +#define SPR5 SPRN_SPRG5 +#define SPR6 SPRN_SPRG6 +#define SPR7 SPRN_SPRG7 +#define SPRG0 SPRN_SPRG0 +#define SPRG1 SPRN_SPRG1 +#define SPRG2 SPRN_SPRG2 +#define SPRG3 SPRN_SPRG3 +#define SPRG4 SPRN_SPRG4 +#define SPRG5 SPRN_SPRG5 +#define SPRG6 SPRN_SPRG6 +#define SPRG7 SPRN_SPRG7 +#define SRR0 SPRN_SRR0 /* Save and Restore Register 0 */ +#define SRR1 SPRN_SRR1 /* Save and Restore Register 1 */ +#define SRR2 SPRN_SRR2 /* Save and Restore Register 2 */ +#define SRR3 SPRN_SRR3 /* Save and Restore Register 3 */ +#define ICTC SPRN_ICTC /* Instruction Cache Throttling Control Reg */ +#define THRM1 SPRN_THRM1 /* Thermal Management Register 1 */ +#define THRM2 SPRN_THRM2 /* Thermal Management Register 2 */ +#define THRM3 SPRN_THRM3 /* Thermal Management Register 3 */ +#define XER SPRN_XER +#define TBRL SPRN_TBRL /* Time Base Read Lower Register */ +#define TBRU SPRN_TBRU /* Time Base Read Upper Register */ +#define TBWL SPRN_TBWL /* Time Base Write Lower Register */ +#define TBWU SPRN_TBWU /* Time Base Write Upper Register */ + +/* Processor Version Register */ + +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +/* + * IBM has further subdivided the standard PowerPC 16-bit version and + * revision subfields of the PVR for the PowerPC 403s into the following: + */ + +#define PVR_FAM(pvr) (((pvr) >> 20) & 0xFFF) /* Family field */ +#define PVR_MEM(pvr) (((pvr) >> 16) & 0xF) /* Member field */ +#define PVR_CORE(pvr) (((pvr) >> 12) & 0xF) /* Core field */ +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + +/* Processor Version Numbers */ + +#define PVR_403GA 0x00200000 +#define PVR_403GB 0x00200100 +#define PVR_403GC 0x00200200 +#define PVR_403GCX 0x00201400 +#define PVR_405GP 0x40110000 +#define PVR_STB03XXX 0x40310000 +#define PVR_NP405H 0x41410000 +#define PVR_NP405L 0x41610000 +#define PVR_440GP_RB 0x40120440 +#define PVR_440GP_RC1 0x40120481 +#define PVR_440GP_RC2 0x40200481 +#define PVR_440GX_RA 0x51b21850 +#define PVR_440GX_RB 0x51b21851 +#define PVR_440GX_RB1 0x51b21852 +#define PVR_601 0x00010000 +#define PVR_602 0x00050000 +#define PVR_603 0x00030000 +#define PVR_603e 0x00060000 +#define PVR_603ev 0x00070000 +#define PVR_603r 0x00071000 +#define PVR_604 0x00040000 +#define PVR_604e 0x00090000 +#define PVR_604r 0x000A0000 +#define PVR_620 0x00140000 +#define PVR_740 0x00080000 +#define PVR_750 PVR_740 +#define PVR_740P 0x10080000 +#define PVR_750P PVR_740P +#define PVR_7400 0x000C0000 +#define PVR_7410 0x800C0000 +#define PVR_7450 0x80000000 +/* + * For the 8xx processors, all of them report the same PVR family for + * the PowerPC core. The various versions of these processors must be + * differentiated by the version number in the Communication Processor + * Module (CPM). + */ +#define PVR_821 0x00500000 +#define PVR_823 PVR_821 +#define PVR_850 PVR_821 +#define PVR_860 PVR_821 +#define PVR_8240 0x00810100 +#define PVR_8245 0x80811014 +#define PVR_8260 PVR_8240 + +/* Segment Registers */ +#define SR0 0 +#define SR1 1 +#define SR2 2 +#define SR3 3 +#define SR4 4 +#define SR5 5 +#define SR6 6 +#define SR7 7 +#define SR8 8 +#define SR9 9 +#define SR10 10 +#define SR11 11 +#define SR12 12 +#define SR13 13 +#define SR14 14 +#define SR15 15 + + +/* returns r3 = relocated address of sym */ +/* modifies r0 */ +#define RELOC_SYM(sym) \ + mflr r3; \ + bl 1f; \ +1: mflr r0; \ + mtlr r3; \ + lis r3, 1b@ha; \ + ori r3, r3, 1b@l; \ + subf r0, r3, r0; \ + lis r3, sym@ha; \ + ori r3, r3, sym@l; \ + add r3, r3, r0 + diff --git a/kexec/arch/ppc64/Makefile b/kexec/arch/ppc64/Makefile new file mode 100644 index 0000000..9caf501 --- /dev/null +++ b/kexec/arch/ppc64/Makefile @@ -0,0 +1,26 @@ +# +# kexec ppc64 (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc64_KEXEC_SRCS = kexec/arch/ppc64/kexec-elf-rel-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-zImage-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-elf-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/crashdump-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/fdt.c +ppc64_KEXEC_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +ppc64_ARCH_REUSE_INITRD = + +ppc64_FS2DT = kexec/fs2dt.c +ppc64_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/ppc64/crashdump-ppc64.h \ + -include $(srcdir)/kexec/arch/ppc64/kexec-ppc64.h + +ppc64_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +dist += kexec/arch/ppc64/Makefile $(ppc64_KEXEC_SRCS) \ + kexec/arch/ppc64/kexec-ppc64.h kexec/arch/ppc64/crashdump-ppc64.h \ + kexec/arch/ppc64/include/arch/fdt.h \ + kexec/arch/ppc64/include/arch/options.h + diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c new file mode 100644 index 0000000..6d47898 --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -0,0 +1,644 @@ +/* + * kexec: Linux boots Linux + * + * Created by: R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +unsigned int num_of_lmb_sets; +unsigned int is_dyn_mem_v2; +uint64_t lmb_size; + +static struct crash_elf_info elf_info64 = +{ + class: ELFCLASS64, +#if BYTE_ORDER == LITTLE_ENDIAN + data: ELFDATA2LSB, +#else + data: ELFDATA2MSB, +#endif + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2MSB, + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +extern struct arch_options_t arch_options; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range = NULL; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +static unsigned long long cstart, cend; +static int memory_ranges; + +/* + * Exclude the region that lies within crashkernel and above the memory + * limit which is reflected by mem= kernel option. + */ +static void exclude_crash_region(uint64_t start, uint64_t end) +{ + /* If memory_limit is set then exclude the memory region above it. */ + if (memory_limit) { + if (start >= memory_limit) + return; + if (end > memory_limit) + end = memory_limit; + } + + if (cstart < end && cend > start) { + if (start < cstart && end > cend) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (start < cstart) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (end > cend) { + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } + } else { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } +} + +static int get_dyn_reconf_crash_memory_ranges(void) +{ + uint64_t start, end; + uint64_t startrange, endrange; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + uint32_t flags; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if (is_dyn_mem_v2) + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + fseek(file, 4, SEEK_SET); + startrange = endrange = 0; + size = lmb_size; + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS]))); + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((flags & 0x80) || !(flags & 0x8)) + continue; + + if (start != endrange) { + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + startrange = start; + } + endrange = end; + } + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + + fclose(file); + return 0; +} + +/* + * For a given memory node, check if it is mapped to system RAM or + * to onboard memory on accelerator device like GPU card or such. + */ +static int is_coherent_device_mem(const char *fname) +{ + char fpath[PATH_LEN]; + char buf[32]; + DIR *dmem; + FILE *file; + struct dirent *mentry; + int cnt, ret = 0; + + strcpy(fpath, fname); + if ((dmem = opendir(fpath)) == NULL) { + perror(fpath); + return -1; + } + + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "compatible")) + continue; + + strcat(fpath, "/compatible"); + if ((file = fopen(fpath, "r")) == NULL) { + perror(fpath); + ret = -1; + break; + } + if ((cnt = fread(buf, 1, 32, file)) < 0) { + perror(fpath); + fclose(file); + ret = -1; + break; + } + if (!strncmp(buf, "ibm,coherent-device-memory", 26)) { + fclose(file); + ret = 1; + break; + } + fclose(file); + } + + closedir(dmem); + return ret; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[PATH_LEN]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n, ret, crash_rng_len = 0; + unsigned long long start, end; + int page_size; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + + /* create a separate program header for the backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END + 1; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + goto err; + } + + cstart = crash_base; + cend = crash_base + crash_size; + + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)){ + get_dyn_reconf_crash_memory_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + + ret = is_coherent_device_mem(fname); + if (ret == -1) { + closedir(dir); + goto err; + } else if (ret == 1) { + /* + * Avoid adding this memory region as it is not + * mapped to system RAM. + */ + continue; + } + + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + goto err; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + start = be64_to_cpu(((unsigned long long *)buf)[0]); + end = start + + be64_to_cpu(((unsigned long long *)buf)[1]); + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + exclude_crash_region(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The rtas section created here is formed by reading rtas-base + * and rtas-size from /proc/device-tree/rtas. Unfortunately + * rtas-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in rtas-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + + /* + * If OPAL region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < opal_base + opal_size && + opal_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = opal_base; + cend = opal_base + opal_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The opal section created here is formed by reading opal-base + * and opal-size from /proc/device-tree/ibm,opal. Unfortunately + * opal-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in opal-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for(j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdline_size, cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + uint64_t max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz; + uint64_t elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); + + /* On ppc64 memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free (tmp); + return -1; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base, + max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + unsigned int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i=0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx\n", + i, base, size); +#endif + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx", + i, base, usablemem_rgns.ranges[i].end - base); +#endif + return; + } else if (end > uend){ + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new end:%llx, new size:%llx", + i, end, end - usablemem_rgns.ranges[i].start); +#endif + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = be64_to_cpu(value); + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + be64_to_cpu(value) - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + +#if 0 +static int sort_regions(mem_rgns_t *rgn) +{ + int i, j; + unsigned long long tstart, tend; + for (i = 0; i < rgn->size; i++) { + for (j = 0; j < rgn->size - i - 1; j++) { + if (rgn->ranges[j].start > rgn->ranges[j+1].start) { + tstart = rgn->ranges[j].start; + tend = rgn->ranges[j].end; + rgn->ranges[j].start = rgn->ranges[j+1].start; + rgn->ranges[j].end = rgn->ranges[j+1].end; + rgn->ranges[j+1].start = tstart; + rgn->ranges[j+1].end = tend; + } + } + } + return 0; + +} +#endif + diff --git a/kexec/arch/ppc64/crashdump-ppc64.h b/kexec/arch/ppc64/crashdump-ppc64.h new file mode 100644 index 0000000..b0cba8a --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.h @@ -0,0 +1,51 @@ +#ifndef CRASHDUMP_PPC64_H +#define CRASHDUMP_PPC64_H + +#include <stdint.h> +#include <sys/types.h> + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + uint64_t max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +#define PAGE_OFFSET 0xC000000000000000ULL +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE 0xD000000000000000ULL + +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define MAXMEM (-(unsigned long)(KERNELBASE-VMALLOCBASE)) + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +#define KERNEL_RUN_AT_ZERO_MAGIC 0x72756e30 /* "run0" */ + +extern uint64_t crash_base; +extern uint64_t crash_size; +extern uint64_t memory_limit; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; + +/* + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB + * sets where each set represents a group of sequential LMB entries. In + * case of ibm,dynamic-memory property, the number of LMB sets is nothing + * but the total number of LMB entries. + */ +extern unsigned int num_of_lmb_sets; +extern unsigned int is_dyn_mem_v2; +extern uint64_t lmb_size; + +#define LMB_ENTRY_SIZE 24 +#define DRCONF_ADDR (is_dyn_mem_v2 ? 4 : 0) +#define DRCONF_FLAGS 20 + +#endif /* CRASHDUMP_PPC64_H */ diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c new file mode 100644 index 0000000..8bc6d2d --- /dev/null +++ b/kexec/arch/ppc64/fdt.c @@ -0,0 +1,78 @@ +/* + * ppc64 fdt fixups + * + * Copyright 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <arch/fdt.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +/* + * Let the kernel know it booted from kexec, as some things (e.g. + * secondary CPU release) may work differently. + */ +static int fixup_kexec_prop(void *fdt) +{ + int err, nodeoffset; + + nodeoffset = fdt_subnode_offset(fdt, 0, "chosen"); + if (nodeoffset < 0) + nodeoffset = fdt_add_subnode(fdt, 0, "chosen"); + if (nodeoffset < 0) { + printf("%s: add /chosen %s\n", __func__, + fdt_strerror(nodeoffset)); + return -1; + } + + err = fdt_setprop(fdt, nodeoffset, "linux,booted-from-kexec", + NULL, 0); + if (err < 0) { + printf("%s: couldn't write linux,booted-from-kexec: %s\n", + __func__, fdt_strerror(err)); + return -1; + } + + return 0; +} + + +/* + * For now, assume that the added content fits in the file. + * This should be the case when flattening from /proc/device-tree, + * and when passing in a dtb, dtc can be told to add padding. + */ +int fixup_dt(char **fdt, off_t *size) +{ + int ret; + + *size += 4096; + *fdt = realloc(*fdt, *size); + if (!*fdt) { + fprintf(stderr, "%s: out of memory\n", __func__); + return -1; + } + + ret = fdt_open_into(*fdt, *fdt, *size); + if (ret < 0) { + fprintf(stderr, "%s: fdt_open_into: %s\n", __func__, + fdt_strerror(ret)); + return -1; + } + + ret = fixup_kexec_prop(*fdt); + if (ret < 0) + return ret; + + return 0; +} diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h new file mode 100644 index 0000000..b19f185 --- /dev/null +++ b/kexec/arch/ppc64/include/arch/fdt.h @@ -0,0 +1,8 @@ +#ifndef KEXEC_ARCH_PPC64_FDT +#define KEXEC_ARCH_PPC64_FDT + +#include <sys/types.h> + +int fixup_dt(char **fdt, off_t *size); + +#endif diff --git a/kexec/arch/ppc64/include/arch/options.h b/kexec/arch/ppc64/include/arch/options.h new file mode 100644 index 0000000..2bca96a --- /dev/null +++ b/kexec/arch/ppc64/include/arch/options.h @@ -0,0 +1,51 @@ +#ifndef KEXEC_ARCH_PPC64_OPTIONS_H +#define KEXEC_ARCH_PPC64_OPTIONS_H + +#define OPT_ELF64_CORE (OPT_MAX+0) +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+1) +#define OPT_ARCH_MAX (OPT_MAX+2) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_DEVICETREEBLOB (OPT_ARCH_MAX+2) +#define OPT_ARGS_IGNORE (OPT_ARCH_MAX+3) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + + +#endif /* KEXEC_ARCH_PPC64_OPTIONS_H */ diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c new file mode 100644 index 0000000..01d045f --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -0,0 +1,496 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <libfdt.h> +#include <arch/fdt.h> +#include <arch/options.h> + +uint64_t initrd_base, initrd_size; +unsigned char reuse_initrd = 0; +const char *ramdisk; + +int elf_ppc64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if ((ehdr.e_machine != EM_PPC64) && (ehdr.e_machine != EM_PPC)) { + /* for a different architecture */ + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +static int read_prop(char *name, void *value, size_t len) +{ + int fd; + size_t rlen; + + fd = open(name, O_RDONLY); + if (fd == -1) + return -1; + + rlen = read(fd, value, len); + if (rlen < 0) + fprintf(stderr, "Warning : Can't read %s : %s", + name, strerror(errno)); + else if (rlen != len) + fprintf(stderr, "Warning : short read from %s", name); + + close(fd); + return 0; +} + +static int elf_ppc64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *cmdline, *dtb; + char *append_cmdline = NULL; + char *reuse_cmdline = NULL; + int opt, cmdline_len = 0; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* Parse command line arguments */ + cmdline = 0; + dtb = 0; + ramdisk = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + dtb = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + if (dtb) + die("--dtb not supported while using --kexec-file-syscall.\n"); + + if (reuse_initrd) + die("--reuseinitrd not supported with --kexec-file-syscall.\n"); + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (cmdline) { + cmdline_len = strlen(cmdline) + 1; + } else { + cmdline = strdup("\0"); + cmdline_len = 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = cmdline; + info->command_line_len = cmdline_len; + return ret; +out: + if (cmdline_len == 1) + free(cmdline); + return ret; +} + +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline, *modified_cmdline = NULL; + char *reuse_cmdline = NULL; + char *append_cmdline = NULL; + const char *devicetreeblob; + uint64_t max_addr, hole_addr; + char *seg_buf = NULL; + off_t seg_size = 0; + struct mem_phdr *phdr; + size_t size; +#ifdef NEED_RESERVE_DTB + uint64_t *rsvmap_ptr; + struct bootblock *bb_ptr; +#endif + int result, opt; + uint64_t my_kernel, my_dt_offset; + uint64_t my_opal_base = 0, my_opal_entry = 0; + unsigned int my_panic_kernel; + uint64_t my_stack, my_backup_start; + uint64_t toc_addr; + uint32_t my_run_at_load; + unsigned int slave_code[256/sizeof (unsigned int)], master_entry; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + if (info->file_mode) + return elf_ppc64_load_file(argc, argv, info); + + /* Parse command line arguments */ + initrd_base = 0; + initrd_size = 0; + cmdline = 0; + ramdisk = 0; + devicetreeblob = 0; + max_addr = 0xFFFFFFFFFFFFFFFFULL; + hole_addr = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + devicetreeblob = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (!cmdline) + fprintf(stdout, "Warning: append= option is not passed. Using the first kernel root partition\n"); + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (cmdline) { + strncpy(modified_cmdline, cmdline, COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + my_kernel = hole_addr = locate_hole(info, size, 0, 0, max_addr, 1); + ehdr.e_phdr[0].p_paddr = hole_addr; + result = elf_exec_load(&ehdr, info); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (result < 0) + return -1; + /* Use new command line. */ + cmdline = modified_cmdline; + } + + /* Add v2wrap to the current image */ + elf_rel_build_load(info, &info->rhdr, purgatory, + purgatory_size, 0, max_addr, 1, 0); + + /* Add a ram-disk to the current image + * Note: Add the ramdisk after elf_rel_build_load + */ + if (ramdisk) { + if (devicetreeblob) { + fprintf(stderr, + "Can't use ramdisk with device tree blob input\n"); + return -1; + } + seg_buf = slurp_file(ramdisk, &seg_size); + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, 1); + initrd_base = hole_addr; + initrd_size = seg_size; + } /* ramdisk */ + + if (devicetreeblob) { + /* Grab device tree from buffer */ + seg_buf = slurp_file(devicetreeblob, &seg_size); + } else { + /* create from fs2dt */ + create_flatten_tree(&seg_buf, &seg_size, cmdline); + } + + result = fixup_dt(&seg_buf, &seg_size); + if (result < 0) + return result; + + my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, -1); + +#ifdef NEED_RESERVE_DTB + /* patch reserve map address for flattened device-tree + * find last entry (both 0) in the reserve mem list. Assume DT + * entry is before this one + */ + bb_ptr = (struct bootblock *)(seg_buf); + rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap)); + while (*rsvmap_ptr || *(rsvmap_ptr+1)) + rsvmap_ptr += 2; + rsvmap_ptr -= 2; + *rsvmap_ptr = cpu_to_be64(my_dt_offset); + rsvmap_ptr++; + *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize)); +#endif + + if (read_prop("/proc/device-tree/ibm,opal/opal-base-address", + &my_opal_base, sizeof(my_opal_base)) == 0) { + my_opal_base = be64_to_cpu(my_opal_base); + elf_rel_set_symbol(&info->rhdr, "opal_base", + &my_opal_base, sizeof(my_opal_base)); + } + + if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address", + &my_opal_entry, sizeof(my_opal_entry)) == 0) { + my_opal_entry = be64_to_cpu(my_opal_entry); + elf_rel_set_symbol(&info->rhdr, "opal_entry", + &my_opal_entry, sizeof(my_opal_entry)); + } + + /* Set kernel */ + elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + + /* Set dt_offset */ + elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + + /* get slave code from new kernel, put in purgatory */ + elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + master_entry = slave_code[0]; + memcpy(slave_code, phdr->p_data, sizeof(slave_code)); + slave_code[0] = master_entry; + elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + my_panic_kernel = 1; + /* Set panic flag */ + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &my_panic_kernel, sizeof(my_panic_kernel)); + + /* Set backup address */ + my_backup_start = info->backup_start; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &my_backup_start, sizeof(my_backup_start)); + + /* Tell relocatable kernel to run at load address + * via word before slave code in purgatory + */ + + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + if (my_run_at_load == KERNEL_RUN_AT_ZERO_MAGIC) + my_run_at_load = 1; + /* else it should be a fixed offset image */ + elf_rel_set_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + } + + /* Set stack address */ + my_stack = locate_hole(info, 16*1024, 0, 0, max_addr, 1); + my_stack += 16*1024; + elf_rel_set_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + + /* Set toc */ + toc_addr = my_r2(&info->rhdr); + elf_rel_set_symbol(&info->rhdr, "my_toc", &toc_addr, sizeof(toc_addr)); + + /* Set debug */ + elf_rel_set_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + my_kernel = 0; + my_dt_offset = 0; + my_panic_kernel = 0; + my_backup_start = 0; + my_stack = 0; + toc_addr = 0; + my_run_at_load = 0; + my_debug = 0; + my_opal_base = 0; + my_opal_entry = 0; + + elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base, + sizeof(my_opal_base)); + elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry, + sizeof(my_opal_entry)); + elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel, + sizeof(my_panic_kernel)); + elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start, + sizeof(my_backup_start)); + elf_rel_get_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + elf_rel_get_symbol(&info->rhdr, "my_toc", &toc_addr, + sizeof(toc_addr)); + elf_rel_get_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + dbgprintf("info->entry is %p\n", info->entry); + dbgprintf("kernel is %llx\n", (unsigned long long)my_kernel); + dbgprintf("dt_offset is %llx\n", + (unsigned long long)my_dt_offset); + dbgprintf("run_at_load flag is %x\n", my_run_at_load); + dbgprintf("panic_kernel is %x\n", my_panic_kernel); + dbgprintf("backup_start is %llx\n", + (unsigned long long)my_backup_start); + dbgprintf("stack is %llx\n", (unsigned long long)my_stack); + dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr); + dbgprintf("purgatory size is %zu\n", purgatory_size); + dbgprintf("debug is %d\n", my_debug); + dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base); + dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry); + + return 0; +} + +void elf_ppc64_usage(void) +{ + fprintf(stderr, " --command-line=<Command line> command line to append.\n"); + fprintf(stderr, " --append=<Command line> same as --command-line.\n"); + fprintf(stderr, " --ramdisk=<filename> Initial RAM disk.\n"); + fprintf(stderr, " --initrd=<filename> same as --ramdisk.\n"); + fprintf(stderr, " --devicetreeblob=<filename> Specify device tree blob file.\n"); + fprintf(stderr, " "); + fprintf(stderr, "Not applicable while using --kexec-file-syscall.\n"); + fprintf(stderr, " --reuse-cmdline Use kernel command line from running system.\n"); + fprintf(stderr, " --dtb=<filename> same as --devicetreeblob.\n"); + + fprintf(stderr, "elf support is still broken\n"); +} diff --git a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c new file mode 100644 index 0000000..51b1354 --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c @@ -0,0 +1,204 @@ +#include <stdio.h> +#include <elf.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc64.h" + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STO_PPC64_LOCAL_BIT 5 +#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other) \ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(struct mem_sym *sym) +{ + /* If this symbol has a local entry point, use it. */ + return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +static unsigned int local_entry_offset(struct mem_sym *UNUSED(sym)) +{ + return 0; +} +#endif + +static struct mem_shdr *toc_section(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr, *shdr_end; + unsigned char *strtab; + + strtab = (unsigned char *)ehdr->e_shdr[ehdr->e_shstrndx].sh_data; + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + for (shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (shdr->sh_size && + strcmp((char *)&strtab[shdr->sh_name], ".toc") == 0) { + return shdr; + } + } + + return NULL; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + struct mem_shdr *toc; + + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_PPC64) { + return 0; + } + + /* Ensure .toc is sufficiently aligned. */ + toc = toc_section(ehdr); + if (toc && toc->sh_addralign < 256) + toc->sh_addralign = 256; + return 1; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +unsigned long my_r2(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr; + + shdr = toc_section(ehdr); + if (!shdr) { + die("TOC reloc without a toc section?"); + } + + return shdr->sh_addr + 0x8000; +} + +static void do_relative_toc(unsigned long value, uint16_t *location, + unsigned long mask, int complain_signed) +{ + if (complain_signed && (value + 0x8000 > 0xffff)) { + die("TOC16 relocation overflows (%lu)\n", value); + } + + if ((~mask & 0xffff) & value) { + die("bad TOC16 relocation (%lu)\n", value); + } + + *location = (*location & ~mask) | (value & mask); +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *sym, + unsigned long r_type, void *location, unsigned long address, + unsigned long value) +{ + switch(r_type) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC64_ADDR64: + case R_PPC64_REL64: + /* Simply set it */ + *(uint64_t *)location = value; + break; + + case R_PPC64_REL32: + *(uint32_t *)location = value - (uint32_t)location; + break; + + case R_PPC64_TOC: + *(uint64_t *)location = my_r2(ehdr); + break; + + case R_PPC64_TOC16: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 1); + break; + + case R_PPC64_TOC16_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 1); + break; + + case R_PPC64_TOC16_LO: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 0); + break; + + case R_PPC64_TOC16_LO_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 0); + break; + + case R_PPC64_TOC16_HI: + do_relative_toc((value - my_r2(ehdr)) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_TOC16_HA: + do_relative_toc((value - my_r2(ehdr) + 0x8000) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_REL24: + value += local_entry_offset(sym); + /* Convert value to relative */ + value -= address; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0) { + die("REL24 %li out of range!\n", (long int)value); + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | + (value & 0x03fffffc); + break; + + case R_PPC64_ADDR16_LO: + *(uint16_t *)location = value & 0xffff; + break; + + case R_PPC64_ADDR16_HI: + *(uint16_t *)location = (value >> 16) & 0xffff; + break; + + case R_PPC64_ADDR16_HA: + *(uint16_t *)location = (((value + 0x8000) >> 16) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHER: + *(uint16_t *)location = (((uint64_t)value >> 32) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(uint16_t *)location = (((uint64_t)value >> 48) & 0xffff); + break; + + /* R_PPC64_REL16_HA and R_PPC64_REL16_LO are handled to support + * ABIv2 r2 assignment based on r12 for PIC executable. + * Here address is know so replace + * 0: addis 2,12,.TOC.-0b@ha + * addi 2,2,.TOC.-0b@l + * by + * lis 2,.TOC.@ha + * addi 2,2,.TOC.@l + */ + case R_PPC64_REL16_HA: + /* check that we are dealing with the addis 2,12 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x3c4c0000) + die("Unexpected instruction for R_PPC64_REL16_HA"); + value += my_r2(ehdr); + /* replacing by lis 2 */ + *(uint32_t *)location = 0x3c400000 + ((value >> 16) & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* check that we are dealing with the addi 2,2 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x38420000) + die("Unexpected instruction for R_PPC64_REL16_LO"); + + value += my_r2(ehdr) - 4; + *(uint16_t *)location = value & 0xffff; + break; + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c new file mode 100644 index 0000000..611809f --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -0,0 +1,969 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <arch/options.h> + +static struct memory_range *exclude_range = NULL; +static struct memory_range *memory_range = NULL; +static struct memory_range *base_memory_range = NULL; +static uint64_t rma_top; +uint64_t memory_max = 0; +uint64_t memory_limit; +static int nr_memory_ranges, nr_exclude_ranges; +uint64_t crash_base, crash_size; +unsigned int rtas_base, rtas_size; +uint64_t opal_base, opal_size; +int max_memory_ranges; + +static void cleanup_memory_ranges(void) +{ + if (memory_range) + free(memory_range); + if (base_memory_range) + free(base_memory_range); + if (exclude_range) + free(exclude_range); + if (usablemem_rgns.ranges) + free(usablemem_rgns.ranges); +} + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = (struct memory_range *) malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = (struct memory_range *) malloc(memory_range_len); + if (!exclude_range) + goto err1; + + usablemem_rgns.ranges = (struct memory_range *) + malloc(memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + memset(usablemem_rgns.ranges, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; + +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + + +static void add_base_memory_range(uint64_t start, uint64_t end) +{ + base_memory_range[nr_memory_ranges].start = start; + base_memory_range[nr_memory_ranges].end = end; + base_memory_range[nr_memory_ranges].type = RANGE_RAM; + nr_memory_ranges++; + if (nr_memory_ranges >= max_memory_ranges) + realloc_memory_ranges(); + + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[nr_memory_ranges-1].start, + base_memory_range[nr_memory_ranges-1].end, + base_memory_range[nr_memory_ranges-1].type); +} + +static int get_dyn_reconf_base_ranges(void) +{ + uint64_t start, end; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,lmb-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + if (fread(buf, 1, 8, file) != 8) { + perror(fname); + fclose(file); + return -1; + } + /* + * lmb_size, num_of_lmb_sets(global variables) are + * initialized once here. + */ + size = lmb_size = be64_to_cpu(((uint64_t *)buf)[0]); + fclose(file); + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, + "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if ((file = fopen(fname, "r")) == NULL) { + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + is_dyn_mem_v2 = 1; + } + + /* first 4 bytes tell the number of lmb set entries */ + if (fread(buf, 1, 4, file) != 4) { + perror(fname); + fclose(file); + return -1; + } + num_of_lmb_sets = be32_to_cpu(((unsigned int *)buf)[0]); + + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + add_base_memory_range(start, end); + } + fclose(file); + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + uint64_t start, end; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)) { + get_dyn_reconf_base_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0) + break; + } + start = be64_to_cpu(((uint64_t *)buf)[0]); + end = start + be64_to_cpu(((uint64_t *)buf)[1]); + add_base_memory_range(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index) +{ + char fname[256], buf[16]; + FILE *file; + int i = *range_index; + + strcpy(fname, "/proc/device-tree/reserved-ranges"); + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return; + } + errno = 0; + /* File not present. Non PowerKVM system. */ + return; + } + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) { + uint64_t base, size; + + base = be64_to_cpu(((uint64_t *)buf)[0]); + size = be64_to_cpu(((uint64_t *)buf)[1]); + + exclude_range[i].start = base; + exclude_range[i].end = base + size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + reserve(base, size); + } + fclose(file); + *range_index = i; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rma_base = -1, base; + uint64_t tce_base; + unsigned int tce_size; + uint64_t htab_base, htab_size; + uint64_t kernel_end; + uint64_t initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + struct stat fstat; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + scan_reserved_ranges(kexec_flags, &i); + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory") && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4) && + strncmp(dentry->d_name, "ibm,opal", 8)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + strcat(fname, "/linux,kernel-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&kernel_end, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + kernel_end = be64_to_cpu(kernel_end); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + if (kexec_flags & KEXEC_ON_CRASH) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_base, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_base = be64_to_cpu(crash_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_size, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_size = be64_to_cpu(crash_size); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + + add_usable_mem_rgns(0, crash_base + crash_size); + reserve(KDUMP_BACKUP_LIMIT, crash_base-KDUMP_BACKUP_LIMIT); + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,memory-limit"); + if ((file = fopen(fname, "r")) == NULL) { + if (errno != ENOENT) { + perror(fname); + goto error_opencdir; + } + errno = 0; + /* + * File not present. + * fall through. On older kernel this file + * is not present. + */ + } else { + if (fread(&memory_limit, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + memory_limit = be64_to_cpu(memory_limit); + } + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_base = be64_to_cpu(htab_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_size = be64_to_cpu(htab_size); + + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + /* reserve the initrd_start and end locations. */ + if (reuse_initrd) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-start"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_start, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_start = be64_to_cpu(initrd_start); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_end, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_end = be64_to_cpu(initrd_end); + fclose(file); + + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* chosen */ + + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + rtas_base = be32_to_cpu(rtas_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + rtas_size = be32_to_cpu(rtas_size); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (strncmp(dentry->d_name, "ibm,opal", 8) == 0) { + strcat(fname, "/opal-base-address"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + opal_base = be64_to_cpu(opal_base); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/opal-runtime-size"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + opal_size = be64_to_cpu(opal_size); + /* Add OPAL to exclude_range */ + exclude_range[i].start = opal_base; + exclude_range[i].end = opal_base + opal_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(opal_base, opal_size); + } /* ibm,opal */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + base = be64_to_cpu(((uint64_t *)buf)[0]); + if (base < rma_base) { + rma_base = base; + rma_top = base + be64_to_cpu(((uint64_t *)buf)[1]); + } + + fclose(file); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_base = be64_to_cpu(tce_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,tce-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_size = be32_to_cpu(tce_size); + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + +/* Setup a sorted list of memory ranges. */ +int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit the end to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit range to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + } + nr_memory_ranges = j; + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + +/* Return a list of valid memory ranges */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + /* allocate memory_range dynamically */ + max_memory_ranges = 1; + + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + /* + * copy the memory here, another realloc_memory_ranges might + * corrupt the old memory + */ + *range = calloc(sizeof(struct memory_range), nr_memory_ranges); + if (*range == NULL) + return -1; + memmove(*range, memory_range, + sizeof(struct memory_range) * nr_memory_ranges); + + *ranges = nr_memory_ranges; + dbgprintf("get memory ranges:%d\n", nr_memory_ranges); + return 0; +} + +struct file_type file_type[] = { + { "elf-ppc64", elf_ppc64_probe, elf_ppc64_load, elf_ppc64_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + fprintf(stderr, " --elf64-core-headers Prepare core headers in ELF64 format\n"); + fprintf(stderr, " --dt-no-old-root Do not reuse old kernel root= param.\n" \ + " while creating flatten device tree.\n"); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* We are running a 32-bit kexec-tools on 64-bit ppc64. + * So pass KEXEC_ARCH_PPC64 here + */ + { "ppc64", KEXEC_ARCH_PPC64 }, + { "ppc64le", KEXEC_ARCH_PPC64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h new file mode 100644 index 0000000..434b4bf --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_PPC64_H +#define KEXEC_PPC64_H + +#define PATH_LEN 256 +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 17 +#if (BOOT_BLOCK_VERSION < 16) +# define NEED_STRUCTURE_BLOCK_EXTRA_PAD +#endif +#define HAVE_DYNAMIC_MEMORY +#define NEED_RESERVE_DTB + +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); + +int setup_memory_ranges(unsigned long kexec_flags); + +int elf_ppc64_probe(const char *buf, off_t len); +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc64_usage(void); + +struct mem_ehdr; +unsigned long my_r2(const struct mem_ehdr *ehdr); + +extern uint64_t initrd_base, initrd_size; +extern int max_memory_ranges; +extern unsigned char reuse_initrd; + +struct arch_options_t { + int core_header_type; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; + +extern mem_rgns_t usablemem_rgns; + +#endif /* KEXEC_PPC64_H */ diff --git a/kexec/arch/ppc64/kexec-zImage-ppc64.c b/kexec/arch/ppc64/kexec-zImage-ppc64.c new file mode 100644 index 0000000..e946205 --- /dev/null +++ b/kexec/arch/ppc64/kexec-zImage-ppc64.c @@ -0,0 +1,184 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" + +#define MAX_HEADERS 32 + +int zImage_ppc64_probe(FILE *file) +{ + Elf32_Ehdr elf; + int valid; + + if (fseek(file, 0, SEEK_SET) < 0) { + fprintf(stderr, "seek error: %s\n", + strerror(errno)); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + fprintf(stderr, "read error: %s\n", + strerror(errno)); + return -1; + } + + if (elf.e_machine == EM_PPC64) { + fprintf(stderr, "Elf64 not supported\n"); + return -1; + } + + valid = (elf.e_ident[EI_MAG0] == ELFMAG0 && + elf.e_ident[EI_MAG1] == ELFMAG1 && + elf.e_ident[EI_MAG2] == ELFMAG2 && + elf.e_ident[EI_MAG3] == ELFMAG3 && + elf.e_ident[EI_CLASS] == ELFCLASS32 && + elf.e_ident[EI_DATA] == ELFDATA2MSB && + elf.e_type == ET_EXEC && + elf.e_machine == EM_PPC); + + return valid ? 0 : -1; +} + +int zImage_ppc64_load(FILE *file, int UNUSED(argc), char **UNUSED(argv), + void **ret_entry, struct kexec_segment **ret_segments, + int *ret_nr_segments) +{ + Elf32_Ehdr elf; + Elf32_Phdr *p, *ph; + struct kexec_segment *segment; + int i; + unsigned long memsize, filesize, offset, load_loc = 0; + + /* Parse command line arguments */ + + /* Read in the Elf32 header */ + if (fseek(file, 0, SEEK_SET) < 0) { + perror("seek error:"); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + perror("read error: "); + return -1; + } + if (elf.e_phnum > MAX_HEADERS) { + fprintf(stderr, + "Only kernels with %i program headers are supported\n", + MAX_HEADERS); + return -1; + } + + /* Read the section header */ + ph = (Elf32_Phdr *)malloc(sizeof(Elf32_Phdr) * elf.e_phnum); + if (ph == 0) { + perror("malloc failed: "); + return -1; + } + if (fseek(file, elf.e_phoff, SEEK_SET) < 0) { + perror("seek failed: "); + free(ph); + return -1; + } + if (fread(ph, sizeof(Elf32_Phdr) * elf.e_phnum, 1, file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + + *ret_segments = malloc(elf.e_phnum * sizeof(struct kexec_segment)); + if (*ret_segments == 0) { + fprintf(stderr, "malloc failed: %s\n", + strerror(errno)); + free(ph); + return -1; + } + segment = ret_segments[0]; + + /* Scan through the program header */ + memsize = filesize = offset = 0; + p = ph; + for (i = 0; i < elf.e_phnum; ++i, ++p) { + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + if (memsize == 0) { + offset = p->p_offset; + memsize = p->p_memsz; + filesize = p->p_filesz; + load_loc = p->p_vaddr; + } else { + memsize = p->p_offset + p->p_memsz - offset; + filesize = p->p_offset + p->p_filesz - offset; + } + } + if (memsize == 0) { + fprintf(stderr, "Can't find a loadable segment.\n"); + free(ph); + return -1; + } + + /* Load program segments */ + p = ph; + segment->buf = malloc(filesize); + if (segment->buf == 0) { + perror("malloc failed: "); + free(ph); + return -1; + } + for (i = 0; i < elf.e_phnum; ++i, ++p) { + unsigned long mem_offset; + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + + /* skip to the actual image */ + if (fseek(file, p->p_offset, SEEK_SET) < 0) { + perror("seek error: "); + free(ph); + return -1; + } + mem_offset = p->p_vaddr - load_loc; + if (fread((void *)segment->buf+mem_offset, p->p_filesz, 1, + file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + } + segment->mem = (void *) load_loc; + segment->memsz = memsize; + segment->bufsz = filesize; + *ret_entry = (void *)(uintptr_t)elf.e_entry; + *ret_nr_segments = i - 1; + free(ph); + return 0; +} + +void zImage_ppc64_usage(void) +{ + fprintf(stderr, "zImage support is still broken\n"); +} |