diff options
Diffstat (limited to 'kexec/arch/ppc64')
-rw-r--r-- | kexec/arch/ppc64/Makefile | 26 | ||||
-rw-r--r-- | kexec/arch/ppc64/crashdump-ppc64.c | 644 | ||||
-rw-r--r-- | kexec/arch/ppc64/crashdump-ppc64.h | 51 | ||||
-rw-r--r-- | kexec/arch/ppc64/fdt.c | 78 | ||||
-rw-r--r-- | kexec/arch/ppc64/include/arch/fdt.h | 8 | ||||
-rw-r--r-- | kexec/arch/ppc64/include/arch/options.h | 51 | ||||
-rw-r--r-- | kexec/arch/ppc64/kexec-elf-ppc64.c | 496 | ||||
-rw-r--r-- | kexec/arch/ppc64/kexec-elf-rel-ppc64.c | 204 | ||||
-rw-r--r-- | kexec/arch/ppc64/kexec-ppc64.c | 969 | ||||
-rw-r--r-- | kexec/arch/ppc64/kexec-ppc64.h | 45 | ||||
-rw-r--r-- | kexec/arch/ppc64/kexec-zImage-ppc64.c | 184 |
11 files changed, 2756 insertions, 0 deletions
diff --git a/kexec/arch/ppc64/Makefile b/kexec/arch/ppc64/Makefile new file mode 100644 index 0000000..9caf501 --- /dev/null +++ b/kexec/arch/ppc64/Makefile @@ -0,0 +1,26 @@ +# +# kexec ppc64 (linux booting linux) +# +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +ppc64_KEXEC_SRCS = kexec/arch/ppc64/kexec-elf-rel-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-zImage-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-elf-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/crashdump-ppc64.c +ppc64_KEXEC_SRCS += kexec/arch/ppc64/fdt.c +ppc64_KEXEC_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +ppc64_ARCH_REUSE_INITRD = + +ppc64_FS2DT = kexec/fs2dt.c +ppc64_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/ppc64/crashdump-ppc64.h \ + -include $(srcdir)/kexec/arch/ppc64/kexec-ppc64.h + +ppc64_CPPFLAGS = -I$(srcdir)/kexec/libfdt + +dist += kexec/arch/ppc64/Makefile $(ppc64_KEXEC_SRCS) \ + kexec/arch/ppc64/kexec-ppc64.h kexec/arch/ppc64/crashdump-ppc64.h \ + kexec/arch/ppc64/include/arch/fdt.h \ + kexec/arch/ppc64/include/arch/options.h + diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c new file mode 100644 index 0000000..6d47898 --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -0,0 +1,644 @@ +/* + * kexec: Linux boots Linux + * + * Created by: R Sharada (sharada@in.ibm.com) + * Copyright (C) IBM Corporation, 2005. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> +#include <elf.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../crashdump.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" + +#define DEVTREE_CRASHKERNEL_BASE "/proc/device-tree/chosen/linux,crashkernel-base" +#define DEVTREE_CRASHKERNEL_SIZE "/proc/device-tree/chosen/linux,crashkernel-size" + +unsigned int num_of_lmb_sets; +unsigned int is_dyn_mem_v2; +uint64_t lmb_size; + +static struct crash_elf_info elf_info64 = +{ + class: ELFCLASS64, +#if BYTE_ORDER == LITTLE_ENDIAN + data: ELFDATA2LSB, +#else + data: ELFDATA2MSB, +#endif + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +static struct crash_elf_info elf_info32 = +{ + class: ELFCLASS32, + data: ELFDATA2MSB, + machine: EM_PPC64, + page_offset: PAGE_OFFSET, + lowmem_limit: MAXMEM, +}; + +extern struct arch_options_t arch_options; + +/* Stores a sorted list of RAM memory ranges for which to create elf headers. + * A separate program header is created for backup region + */ +static struct memory_range *crash_memory_range = NULL; + +/* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ +static int crash_max_memory_ranges; + +/* + * Used to save various memory ranges/regions needed for the captured + * kernel to boot. (lime memmap= option in other archs) + */ +mem_rgns_t usablemem_rgns = {0, NULL}; + +static unsigned long long cstart, cend; +static int memory_ranges; + +/* + * Exclude the region that lies within crashkernel and above the memory + * limit which is reflected by mem= kernel option. + */ +static void exclude_crash_region(uint64_t start, uint64_t end) +{ + /* If memory_limit is set then exclude the memory region above it. */ + if (memory_limit) { + if (start >= memory_limit) + return; + if (end > memory_limit) + end = memory_limit; + } + + if (cstart < end && cend > start) { + if (start < cstart && end > cend) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (start < cstart) { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = cstart; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } else if (end > cend) { + crash_memory_range[memory_ranges].start = cend; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } + } else { + crash_memory_range[memory_ranges].start = start; + crash_memory_range[memory_ranges].end = end; + crash_memory_range[memory_ranges].type = RANGE_RAM; + memory_ranges++; + } +} + +static int get_dyn_reconf_crash_memory_ranges(void) +{ + uint64_t start, end; + uint64_t startrange, endrange; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + uint32_t flags; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if (is_dyn_mem_v2) + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + fseek(file, 4, SEEK_SET); + startrange = endrange = 0; + size = lmb_size; + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS]))); + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((flags & 0x80) || !(flags & 0x8)) + continue; + + if (start != endrange) { + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + startrange = start; + } + endrange = end; + } + if (startrange != endrange) + exclude_crash_region(startrange, endrange); + + fclose(file); + return 0; +} + +/* + * For a given memory node, check if it is mapped to system RAM or + * to onboard memory on accelerator device like GPU card or such. + */ +static int is_coherent_device_mem(const char *fname) +{ + char fpath[PATH_LEN]; + char buf[32]; + DIR *dmem; + FILE *file; + struct dirent *mentry; + int cnt, ret = 0; + + strcpy(fpath, fname); + if ((dmem = opendir(fpath)) == NULL) { + perror(fpath); + return -1; + } + + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "compatible")) + continue; + + strcat(fpath, "/compatible"); + if ((file = fopen(fpath, "r")) == NULL) { + perror(fpath); + ret = -1; + break; + } + if ((cnt = fread(buf, 1, 32, file)) < 0) { + perror(fpath); + fclose(file); + ret = -1; + break; + } + if (!strncmp(buf, "ibm,coherent-device-memory", 26)) { + fclose(file); + ret = 1; + break; + } + fclose(file); + } + + closedir(dmem); + return ret; +} + + +/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to + * create Elf headers. Keeping it separate from get_memory_ranges() as + * requirements are different in the case of normal kexec and crashdumps. + * + * Normal kexec needs to look at all of available physical memory irrespective + * of the fact how much of it is being used by currently running kernel. + * Crashdumps need to have access to memory regions actually being used by + * running kernel. Expecting a different file/data structure than /proc/iomem + * to look into down the line. May be something like /proc/kernelmem or may + * be zone data structures exported from kernel. + */ +static int get_crash_memory_ranges(struct memory_range **range, int *ranges) +{ + + char device_tree[256] = "/proc/device-tree/"; + char fname[PATH_LEN]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n, ret, crash_rng_len = 0; + unsigned long long start, end; + int page_size; + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; + + crash_memory_range = (struct memory_range *) malloc(crash_rng_len); + if (!crash_memory_range) { + fprintf(stderr, "Allocation for crash memory range failed\n"); + return -1; + } + memset(crash_memory_range, 0, crash_rng_len); + + /* create a separate program header for the backup region */ + crash_memory_range[0].start = BACKUP_SRC_START; + crash_memory_range[0].end = BACKUP_SRC_END + 1; + crash_memory_range[0].type = RANGE_RAM; + memory_ranges++; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + goto err; + } + + cstart = crash_base; + cend = crash_base + crash_size; + + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)){ + get_dyn_reconf_crash_memory_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + + ret = is_coherent_device_mem(fname); + if (ret == -1) { + closedir(dir); + goto err; + } else if (ret == 1) { + /* + * Avoid adding this memory region as it is not + * mapped to system RAM. + */ + continue; + } + + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + goto err; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + goto err; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + goto err; + } + if (memory_ranges >= (max_memory_ranges + 1)) { + /* No space to insert another element. */ + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; + } + + start = be64_to_cpu(((unsigned long long *)buf)[0]); + end = start + + be64_to_cpu(((unsigned long long *)buf)[1]); + if (start == 0 && end >= (BACKUP_SRC_END + 1)) + start = BACKUP_SRC_END + 1; + + exclude_crash_region(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + + /* + * If RTAS region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < rtas_base + rtas_size && + rtas_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = rtas_base; + cend = rtas_base + rtas_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The rtas section created here is formed by reading rtas-base + * and rtas-size from /proc/device-tree/rtas. Unfortunately + * rtas-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in rtas-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + + /* + * If OPAL region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < opal_base + opal_size && + opal_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = opal_base; + cend = opal_base + opal_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The opal section created here is formed by reading opal-base + * and opal-size from /proc/device-tree/ibm,opal. Unfortunately + * opal-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in opal-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } + *range = crash_memory_range; + *ranges = memory_ranges; + + int j; + dbgprintf("CRASH MEMORY RANGES\n"); + for(j = 0; j < *ranges; j++) { + start = crash_memory_range[j].start; + end = crash_memory_range[j].end; + dbgprintf("%016Lx-%016Lx\n", start, end); + } + + return 0; + +err: + if (crash_memory_range) + free(crash_memory_range); + return -1; +} + +static int add_cmdline_param(char *cmdline, uint64_t addr, char *cmdstr, + char *byte) +{ + int cmdline_size, cmdlen, len, align = 1024; + char str[COMMAND_LINE_SIZE], *ptr; + + /* Passing in =xxxK / =xxxM format. Saves space required in cmdline.*/ + switch (byte[0]) { + case 'K': + if (addr%align) + return -1; + addr = addr/align; + break; + case 'M': + addr = addr/(align *align); + break; + } + ptr = str; + strcpy(str, cmdstr); + ptr += strlen(str); + ultoa(addr, ptr); + strcat(str, byte); + len = strlen(str); + cmdlen = strlen(cmdline) + len; + cmdline_size = COMMAND_LINE_SIZE; + if (cmdlen > (cmdline_size - 1)) + die("Command line overflow\n"); + strcat(cmdline, str); + dbgprintf("Command line after adding elfcorehdr: %s\n", cmdline); + return 0; +} + +/* Loads additional segments in case of a panic kernel is being loaded. + * One segment for backup region, another segment for storing elf headers + * for crash memory image. + */ +int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, + uint64_t max_addr, unsigned long min_base) +{ + void *tmp; + unsigned long sz; + uint64_t elfcorehdr; + int nr_ranges, align = 1024, i; + unsigned long long end; + struct memory_range *mem_range; + + if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0) + return -1; + + info->backup_src_start = BACKUP_SRC_START; + info->backup_src_size = BACKUP_SRC_SIZE; + /* Create a backup region segment to store backup data*/ + sz = _ALIGN(BACKUP_SRC_SIZE, align); + tmp = xmalloc(sz); + memset(tmp, 0, sz); + info->backup_start = add_buffer(info, tmp, sz, sz, align, + 0, max_addr, 1); + reserve(info->backup_start, sz); + + /* On ppc64 memory ranges in device-tree is denoted as start + * and size rather than start and end, as is the case with + * other architectures like i386 . Because of this when loading + * the memory ranges in crashdump-elf.c the filesz calculation + * [ end - start + 1 ] goes for a toss. + * + * To be in sync with other archs adjust the end value for + * every crash memory range before calling the generic function + */ + + for (i = 0; i < nr_ranges; i++) { + end = crash_memory_range[i].end - 1; + crash_memory_range[i].end = end; + } + + + /* Create elf header segment and store crash image data. */ + if (arch_options.core_header_type == CORE_TYPE_ELF64) { + if (crash_create_elf64_headers(info, &elf_info64, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free (tmp); + return -1; + } + } + else { + if (crash_create_elf32_headers(info, &elf_info32, + crash_memory_range, nr_ranges, + &tmp, &sz, + ELF_CORE_HEADER_ALIGN) < 0) { + free(tmp); + return -1; + } + } + + elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base, + max_addr, 1); + reserve(elfcorehdr, sz); + /* modify and store the cmdline in a global array. This is later + * read by flatten_device_tree and modified if required + */ + add_cmdline_param(mod_cmdline, elfcorehdr, " elfcorehdr=", "K"); + return 0; +} + +/* + * Used to save various memory regions needed for the captured kernel. + */ + +void add_usable_mem_rgns(unsigned long long base, unsigned long long size) +{ + unsigned int i; + unsigned long long end = base + size; + unsigned long long ustart, uend; + + base = _ALIGN_DOWN(base, getpagesize()); + end = _ALIGN_UP(end, getpagesize()); + + for (i=0; i < usablemem_rgns.size; i++) { + ustart = usablemem_rgns.ranges[i].start; + uend = usablemem_rgns.ranges[i].end; + if (base < uend && end > ustart) { + if ((base >= ustart) && (end <= uend)) + return; + if (base < ustart && end > uend) { + usablemem_rgns.ranges[i].start = base; + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx\n", + i, base, size); +#endif + return; + } else if (base < ustart) { + usablemem_rgns.ranges[i].start = base; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new base:%llx new size:%llx", + i, base, usablemem_rgns.ranges[i].end - base); +#endif + return; + } else if (end > uend){ + usablemem_rgns.ranges[i].end = end; +#ifdef DEBUG + fprintf(stderr, "usable memory rgn %u: new end:%llx, new size:%llx", + i, end, end - usablemem_rgns.ranges[i].start); +#endif + return; + } + } + } + usablemem_rgns.ranges[usablemem_rgns.size].start = base; + usablemem_rgns.ranges[usablemem_rgns.size++].end = end; + + dbgprintf("usable memory rgns size:%u base:%llx size:%llx\n", + usablemem_rgns.size, base, size); +} + +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end) +{ + unsigned long long value; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_BASE, &value)) + *start = be64_to_cpu(value); + else + return -1; + + if (!get_devtree_value(DEVTREE_CRASHKERNEL_SIZE, &value)) + *end = *start + be64_to_cpu(value) - 1; + else + return -1; + + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int fd; + + fd = open(DEVTREE_CRASHKERNEL_BASE, O_RDONLY); + if (fd < 0) + return 0; + close(fd); + return 1; +} + +#if 0 +static int sort_regions(mem_rgns_t *rgn) +{ + int i, j; + unsigned long long tstart, tend; + for (i = 0; i < rgn->size; i++) { + for (j = 0; j < rgn->size - i - 1; j++) { + if (rgn->ranges[j].start > rgn->ranges[j+1].start) { + tstart = rgn->ranges[j].start; + tend = rgn->ranges[j].end; + rgn->ranges[j].start = rgn->ranges[j+1].start; + rgn->ranges[j].end = rgn->ranges[j+1].end; + rgn->ranges[j+1].start = tstart; + rgn->ranges[j+1].end = tend; + } + } + } + return 0; + +} +#endif + diff --git a/kexec/arch/ppc64/crashdump-ppc64.h b/kexec/arch/ppc64/crashdump-ppc64.h new file mode 100644 index 0000000..b0cba8a --- /dev/null +++ b/kexec/arch/ppc64/crashdump-ppc64.h @@ -0,0 +1,51 @@ +#ifndef CRASHDUMP_PPC64_H +#define CRASHDUMP_PPC64_H + +#include <stdint.h> +#include <sys/types.h> + +struct kexec_info; +int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, + uint64_t max_addr, unsigned long min_base); +void add_usable_mem_rgns(unsigned long long base, unsigned long long size); + +#define PAGE_OFFSET 0xC000000000000000ULL +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE 0xD000000000000000ULL + +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define MAXMEM (-(unsigned long)(KERNELBASE-VMALLOCBASE)) + +#define COMMAND_LINE_SIZE 2048 /* from kernel */ +/* Backup Region, First 64K of System RAM. */ +#define BACKUP_SRC_START 0x0000 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#define KDUMP_BACKUP_LIMIT BACKUP_SRC_SIZE + +#define KERNEL_RUN_AT_ZERO_MAGIC 0x72756e30 /* "run0" */ + +extern uint64_t crash_base; +extern uint64_t crash_size; +extern uint64_t memory_limit; +extern unsigned int rtas_base; +extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; + +/* + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB + * sets where each set represents a group of sequential LMB entries. In + * case of ibm,dynamic-memory property, the number of LMB sets is nothing + * but the total number of LMB entries. + */ +extern unsigned int num_of_lmb_sets; +extern unsigned int is_dyn_mem_v2; +extern uint64_t lmb_size; + +#define LMB_ENTRY_SIZE 24 +#define DRCONF_ADDR (is_dyn_mem_v2 ? 4 : 0) +#define DRCONF_FLAGS 20 + +#endif /* CRASHDUMP_PPC64_H */ diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c new file mode 100644 index 0000000..8bc6d2d --- /dev/null +++ b/kexec/arch/ppc64/fdt.c @@ -0,0 +1,78 @@ +/* + * ppc64 fdt fixups + * + * Copyright 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <arch/fdt.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +/* + * Let the kernel know it booted from kexec, as some things (e.g. + * secondary CPU release) may work differently. + */ +static int fixup_kexec_prop(void *fdt) +{ + int err, nodeoffset; + + nodeoffset = fdt_subnode_offset(fdt, 0, "chosen"); + if (nodeoffset < 0) + nodeoffset = fdt_add_subnode(fdt, 0, "chosen"); + if (nodeoffset < 0) { + printf("%s: add /chosen %s\n", __func__, + fdt_strerror(nodeoffset)); + return -1; + } + + err = fdt_setprop(fdt, nodeoffset, "linux,booted-from-kexec", + NULL, 0); + if (err < 0) { + printf("%s: couldn't write linux,booted-from-kexec: %s\n", + __func__, fdt_strerror(err)); + return -1; + } + + return 0; +} + + +/* + * For now, assume that the added content fits in the file. + * This should be the case when flattening from /proc/device-tree, + * and when passing in a dtb, dtc can be told to add padding. + */ +int fixup_dt(char **fdt, off_t *size) +{ + int ret; + + *size += 4096; + *fdt = realloc(*fdt, *size); + if (!*fdt) { + fprintf(stderr, "%s: out of memory\n", __func__); + return -1; + } + + ret = fdt_open_into(*fdt, *fdt, *size); + if (ret < 0) { + fprintf(stderr, "%s: fdt_open_into: %s\n", __func__, + fdt_strerror(ret)); + return -1; + } + + ret = fixup_kexec_prop(*fdt); + if (ret < 0) + return ret; + + return 0; +} diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h new file mode 100644 index 0000000..b19f185 --- /dev/null +++ b/kexec/arch/ppc64/include/arch/fdt.h @@ -0,0 +1,8 @@ +#ifndef KEXEC_ARCH_PPC64_FDT +#define KEXEC_ARCH_PPC64_FDT + +#include <sys/types.h> + +int fixup_dt(char **fdt, off_t *size); + +#endif diff --git a/kexec/arch/ppc64/include/arch/options.h b/kexec/arch/ppc64/include/arch/options.h new file mode 100644 index 0000000..2bca96a --- /dev/null +++ b/kexec/arch/ppc64/include/arch/options.h @@ -0,0 +1,51 @@ +#ifndef KEXEC_ARCH_PPC64_OPTIONS_H +#define KEXEC_ARCH_PPC64_OPTIONS_H + +#define OPT_ELF64_CORE (OPT_MAX+0) +#define OPT_DT_NO_OLD_ROOT (OPT_MAX+1) +#define OPT_ARCH_MAX (OPT_MAX+2) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_DEVICETREEBLOB (OPT_ARCH_MAX+2) +#define OPT_ARGS_IGNORE (OPT_ARCH_MAX+3) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+4) + +/* Options relevant to the architecture (excluding loader-specific ones): */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "dt-no-old-root", 0, 0, OPT_DT_NO_OLD_ROOT }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, \ + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + +#define KEXEC_ALL_OPT_STR KEXEC_OPT_STR + + +#endif /* KEXEC_ARCH_PPC64_OPTIONS_H */ diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c new file mode 100644 index 0000000..01d045f --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -0,0 +1,496 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <libfdt.h> +#include <arch/fdt.h> +#include <arch/options.h> + +uint64_t initrd_base, initrd_size; +unsigned char reuse_initrd = 0; +const char *ramdisk; + +int elf_ppc64_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + goto out; + } + + /* Verify the architecuture specific bits */ + if ((ehdr.e_machine != EM_PPC64) && (ehdr.e_machine != EM_PPC)) { + /* for a different architecture */ + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +static int read_prop(char *name, void *value, size_t len) +{ + int fd; + size_t rlen; + + fd = open(name, O_RDONLY); + if (fd == -1) + return -1; + + rlen = read(fd, value, len); + if (rlen < 0) + fprintf(stderr, "Warning : Can't read %s : %s", + name, strerror(errno)); + else if (rlen != len) + fprintf(stderr, "Warning : short read from %s", name); + + close(fd); + return 0; +} + +static int elf_ppc64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *cmdline, *dtb; + char *append_cmdline = NULL; + char *reuse_cmdline = NULL; + int opt, cmdline_len = 0; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + /* Parse command line arguments */ + cmdline = 0; + dtb = 0; + ramdisk = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + dtb = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + if (dtb) + die("--dtb not supported while using --kexec-file-syscall.\n"); + + if (reuse_initrd) + die("--reuseinitrd not supported with --kexec-file-syscall.\n"); + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (cmdline) { + cmdline_len = strlen(cmdline) + 1; + } else { + cmdline = strdup("\0"); + cmdline_len = 1; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = cmdline; + info->command_line_len = cmdline_len; + return ret; +out: + if (cmdline_len == 1) + free(cmdline); + return ret; +} + +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + char *cmdline, *modified_cmdline = NULL; + char *reuse_cmdline = NULL; + char *append_cmdline = NULL; + const char *devicetreeblob; + uint64_t max_addr, hole_addr; + char *seg_buf = NULL; + off_t seg_size = 0; + struct mem_phdr *phdr; + size_t size; +#ifdef NEED_RESERVE_DTB + uint64_t *rsvmap_ptr; + struct bootblock *bb_ptr; +#endif + int result, opt; + uint64_t my_kernel, my_dt_offset; + uint64_t my_opal_base = 0, my_opal_entry = 0; + unsigned int my_panic_kernel; + uint64_t my_stack, my_backup_start; + uint64_t toc_addr; + uint32_t my_run_at_load; + unsigned int slave_code[256/sizeof (unsigned int)], master_entry; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, + { "dtb", 1, NULL, OPT_DEVICETREEBLOB }, + { "args-linux", 0, NULL, OPT_ARGS_IGNORE }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_OPT_STR ""; + + if (info->file_mode) + return elf_ppc64_load_file(argc, argv, info); + + /* Parse command line arguments */ + initrd_base = 0; + initrd_size = 0; + cmdline = 0; + ramdisk = 0; + devicetreeblob = 0; + max_addr = 0xFFFFFFFFFFFFFFFFULL; + hole_addr = 0; + + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append_cmdline = optarg; + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_DEVICETREEBLOB: + devicetreeblob = optarg; + break; + case OPT_ARGS_IGNORE: + break; + case OPT_REUSE_CMDLINE: + reuse_cmdline = get_command_line(); + break; + } + } + + cmdline = concat_cmdline(reuse_cmdline, append_cmdline); + if (!reuse_cmdline) + free(reuse_cmdline); + + if (!cmdline) + fprintf(stdout, "Warning: append= option is not passed. Using the first kernel root partition\n"); + + if (ramdisk && reuse_initrd) + die("Can't specify --ramdisk or --initrd with --reuseinitrd\n"); + + /* Need to append some command line parameters internally in case of + * taking crash dumps. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + modified_cmdline = xmalloc(COMMAND_LINE_SIZE); + memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE); + if (cmdline) { + strncpy(modified_cmdline, cmdline, COMMAND_LINE_SIZE); + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + } + } + + /* Parse the Elf file */ + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* Load the Elf data. Physical load addresses in elf64 header do not + * show up correctly. Use user supplied address for now to patch the + * elf header + */ + + phdr = &ehdr.e_phdr[0]; + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + my_kernel = hole_addr = locate_hole(info, size, 0, 0, max_addr, 1); + ehdr.e_phdr[0].p_paddr = hole_addr; + result = elf_exec_load(&ehdr, info); + if (result < 0) { + free_elf_info(&ehdr); + return result; + } + + /* If panic kernel is being loaded, additional segments need + * to be created. + */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_crashdump_segments(info, modified_cmdline, + max_addr, 0); + if (result < 0) + return -1; + /* Use new command line. */ + cmdline = modified_cmdline; + } + + /* Add v2wrap to the current image */ + elf_rel_build_load(info, &info->rhdr, purgatory, + purgatory_size, 0, max_addr, 1, 0); + + /* Add a ram-disk to the current image + * Note: Add the ramdisk after elf_rel_build_load + */ + if (ramdisk) { + if (devicetreeblob) { + fprintf(stderr, + "Can't use ramdisk with device tree blob input\n"); + return -1; + } + seg_buf = slurp_file(ramdisk, &seg_size); + hole_addr = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, 1); + initrd_base = hole_addr; + initrd_size = seg_size; + } /* ramdisk */ + + if (devicetreeblob) { + /* Grab device tree from buffer */ + seg_buf = slurp_file(devicetreeblob, &seg_size); + } else { + /* create from fs2dt */ + create_flatten_tree(&seg_buf, &seg_size, cmdline); + } + + result = fixup_dt(&seg_buf, &seg_size); + if (result < 0) + return result; + + my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size, + 0, 0, max_addr, -1); + +#ifdef NEED_RESERVE_DTB + /* patch reserve map address for flattened device-tree + * find last entry (both 0) in the reserve mem list. Assume DT + * entry is before this one + */ + bb_ptr = (struct bootblock *)(seg_buf); + rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap)); + while (*rsvmap_ptr || *(rsvmap_ptr+1)) + rsvmap_ptr += 2; + rsvmap_ptr -= 2; + *rsvmap_ptr = cpu_to_be64(my_dt_offset); + rsvmap_ptr++; + *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize)); +#endif + + if (read_prop("/proc/device-tree/ibm,opal/opal-base-address", + &my_opal_base, sizeof(my_opal_base)) == 0) { + my_opal_base = be64_to_cpu(my_opal_base); + elf_rel_set_symbol(&info->rhdr, "opal_base", + &my_opal_base, sizeof(my_opal_base)); + } + + if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address", + &my_opal_entry, sizeof(my_opal_entry)) == 0) { + my_opal_entry = be64_to_cpu(my_opal_entry); + elf_rel_set_symbol(&info->rhdr, "opal_entry", + &my_opal_entry, sizeof(my_opal_entry)); + } + + /* Set kernel */ + elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + + /* Set dt_offset */ + elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + + /* get slave code from new kernel, put in purgatory */ + elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + master_entry = slave_code[0]; + memcpy(slave_code, phdr->p_data, sizeof(slave_code)); + slave_code[0] = master_entry; + elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code, + sizeof(slave_code)); + + if (info->kexec_flags & KEXEC_ON_CRASH) { + my_panic_kernel = 1; + /* Set panic flag */ + elf_rel_set_symbol(&info->rhdr, "panic_kernel", + &my_panic_kernel, sizeof(my_panic_kernel)); + + /* Set backup address */ + my_backup_start = info->backup_start; + elf_rel_set_symbol(&info->rhdr, "backup_start", + &my_backup_start, sizeof(my_backup_start)); + + /* Tell relocatable kernel to run at load address + * via word before slave code in purgatory + */ + + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + if (my_run_at_load == KERNEL_RUN_AT_ZERO_MAGIC) + my_run_at_load = 1; + /* else it should be a fixed offset image */ + elf_rel_set_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + } + + /* Set stack address */ + my_stack = locate_hole(info, 16*1024, 0, 0, max_addr, 1); + my_stack += 16*1024; + elf_rel_set_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + + /* Set toc */ + toc_addr = my_r2(&info->rhdr); + elf_rel_set_symbol(&info->rhdr, "my_toc", &toc_addr, sizeof(toc_addr)); + + /* Set debug */ + elf_rel_set_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + my_kernel = 0; + my_dt_offset = 0; + my_panic_kernel = 0; + my_backup_start = 0; + my_stack = 0; + toc_addr = 0; + my_run_at_load = 0; + my_debug = 0; + my_opal_base = 0; + my_opal_entry = 0; + + elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base, + sizeof(my_opal_base)); + elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry, + sizeof(my_opal_entry)); + elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); + elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset, + sizeof(my_dt_offset)); + elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load, + sizeof(my_run_at_load)); + elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel, + sizeof(my_panic_kernel)); + elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start, + sizeof(my_backup_start)); + elf_rel_get_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack)); + elf_rel_get_symbol(&info->rhdr, "my_toc", &toc_addr, + sizeof(toc_addr)); + elf_rel_get_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug)); + + dbgprintf("info->entry is %p\n", info->entry); + dbgprintf("kernel is %llx\n", (unsigned long long)my_kernel); + dbgprintf("dt_offset is %llx\n", + (unsigned long long)my_dt_offset); + dbgprintf("run_at_load flag is %x\n", my_run_at_load); + dbgprintf("panic_kernel is %x\n", my_panic_kernel); + dbgprintf("backup_start is %llx\n", + (unsigned long long)my_backup_start); + dbgprintf("stack is %llx\n", (unsigned long long)my_stack); + dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr); + dbgprintf("purgatory size is %zu\n", purgatory_size); + dbgprintf("debug is %d\n", my_debug); + dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base); + dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry); + + return 0; +} + +void elf_ppc64_usage(void) +{ + fprintf(stderr, " --command-line=<Command line> command line to append.\n"); + fprintf(stderr, " --append=<Command line> same as --command-line.\n"); + fprintf(stderr, " --ramdisk=<filename> Initial RAM disk.\n"); + fprintf(stderr, " --initrd=<filename> same as --ramdisk.\n"); + fprintf(stderr, " --devicetreeblob=<filename> Specify device tree blob file.\n"); + fprintf(stderr, " "); + fprintf(stderr, "Not applicable while using --kexec-file-syscall.\n"); + fprintf(stderr, " --reuse-cmdline Use kernel command line from running system.\n"); + fprintf(stderr, " --dtb=<filename> same as --devicetreeblob.\n"); + + fprintf(stderr, "elf support is still broken\n"); +} diff --git a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c new file mode 100644 index 0000000..51b1354 --- /dev/null +++ b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c @@ -0,0 +1,204 @@ +#include <stdio.h> +#include <elf.h> +#include <string.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-ppc64.h" + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STO_PPC64_LOCAL_BIT 5 +#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other) \ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(struct mem_sym *sym) +{ + /* If this symbol has a local entry point, use it. */ + return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +static unsigned int local_entry_offset(struct mem_sym *UNUSED(sym)) +{ + return 0; +} +#endif + +static struct mem_shdr *toc_section(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr, *shdr_end; + unsigned char *strtab; + + strtab = (unsigned char *)ehdr->e_shdr[ehdr->e_shstrndx].sh_data; + shdr_end = &ehdr->e_shdr[ehdr->e_shnum]; + for (shdr = ehdr->e_shdr; shdr != shdr_end; shdr++) { + if (shdr->sh_size && + strcmp((char *)&strtab[shdr->sh_name], ".toc") == 0) { + return shdr; + } + } + + return NULL; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + struct mem_shdr *toc; + + if (ehdr->ei_class != ELFCLASS64) { + return 0; + } + if (ehdr->e_machine != EM_PPC64) { + return 0; + } + + /* Ensure .toc is sufficiently aligned. */ + toc = toc_section(ehdr); + if (toc && toc->sh_addralign < 256) + toc->sh_addralign = 256; + return 1; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +unsigned long my_r2(const struct mem_ehdr *ehdr) +{ + struct mem_shdr *shdr; + + shdr = toc_section(ehdr); + if (!shdr) { + die("TOC reloc without a toc section?"); + } + + return shdr->sh_addr + 0x8000; +} + +static void do_relative_toc(unsigned long value, uint16_t *location, + unsigned long mask, int complain_signed) +{ + if (complain_signed && (value + 0x8000 > 0xffff)) { + die("TOC16 relocation overflows (%lu)\n", value); + } + + if ((~mask & 0xffff) & value) { + die("bad TOC16 relocation (%lu)\n", value); + } + + *location = (*location & ~mask) | (value & mask); +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *sym, + unsigned long r_type, void *location, unsigned long address, + unsigned long value) +{ + switch(r_type) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC64_ADDR64: + case R_PPC64_REL64: + /* Simply set it */ + *(uint64_t *)location = value; + break; + + case R_PPC64_REL32: + *(uint32_t *)location = value - (uint32_t)location; + break; + + case R_PPC64_TOC: + *(uint64_t *)location = my_r2(ehdr); + break; + + case R_PPC64_TOC16: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 1); + break; + + case R_PPC64_TOC16_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 1); + break; + + case R_PPC64_TOC16_LO: + do_relative_toc(value - my_r2(ehdr), location, 0xffff, 0); + break; + + case R_PPC64_TOC16_LO_DS: + do_relative_toc(value - my_r2(ehdr), location, 0xfffc, 0); + break; + + case R_PPC64_TOC16_HI: + do_relative_toc((value - my_r2(ehdr)) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_TOC16_HA: + do_relative_toc((value - my_r2(ehdr) + 0x8000) >> 16, location, + 0xffff, 0); + break; + + case R_PPC64_REL24: + value += local_entry_offset(sym); + /* Convert value to relative */ + value -= address; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0) { + die("REL24 %li out of range!\n", (long int)value); + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | + (value & 0x03fffffc); + break; + + case R_PPC64_ADDR16_LO: + *(uint16_t *)location = value & 0xffff; + break; + + case R_PPC64_ADDR16_HI: + *(uint16_t *)location = (value >> 16) & 0xffff; + break; + + case R_PPC64_ADDR16_HA: + *(uint16_t *)location = (((value + 0x8000) >> 16) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHER: + *(uint16_t *)location = (((uint64_t)value >> 32) & 0xffff); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(uint16_t *)location = (((uint64_t)value >> 48) & 0xffff); + break; + + /* R_PPC64_REL16_HA and R_PPC64_REL16_LO are handled to support + * ABIv2 r2 assignment based on r12 for PIC executable. + * Here address is know so replace + * 0: addis 2,12,.TOC.-0b@ha + * addi 2,2,.TOC.-0b@l + * by + * lis 2,.TOC.@ha + * addi 2,2,.TOC.@l + */ + case R_PPC64_REL16_HA: + /* check that we are dealing with the addis 2,12 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x3c4c0000) + die("Unexpected instruction for R_PPC64_REL16_HA"); + value += my_r2(ehdr); + /* replacing by lis 2 */ + *(uint32_t *)location = 0x3c400000 + ((value >> 16) & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* check that we are dealing with the addi 2,2 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x38420000) + die("Unexpected instruction for R_PPC64_REL16_LO"); + + value += my_r2(ehdr) - 4; + *(uint16_t *)location = value & 0xffff; + break; + + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } +} diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c new file mode 100644 index 0000000..611809f --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -0,0 +1,969 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2003-2005 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-ppc64.h" +#include "../../fs2dt.h" +#include "crashdump-ppc64.h" +#include <arch/options.h> + +static struct memory_range *exclude_range = NULL; +static struct memory_range *memory_range = NULL; +static struct memory_range *base_memory_range = NULL; +static uint64_t rma_top; +uint64_t memory_max = 0; +uint64_t memory_limit; +static int nr_memory_ranges, nr_exclude_ranges; +uint64_t crash_base, crash_size; +unsigned int rtas_base, rtas_size; +uint64_t opal_base, opal_size; +int max_memory_ranges; + +static void cleanup_memory_ranges(void) +{ + if (memory_range) + free(memory_range); + if (base_memory_range) + free(base_memory_range); + if (exclude_range) + free(exclude_range); + if (usablemem_rgns.ranges) + free(usablemem_rgns.ranges); +} + +/* + * Allocate memory for various data structures used to hold + * values of different memory ranges + */ +static int alloc_memory_ranges(void) +{ + int memory_range_len; + + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) malloc(memory_range_len); + if (!memory_range) + return -1; + + base_memory_range = (struct memory_range *) malloc(memory_range_len); + if (!base_memory_range) + goto err1; + + exclude_range = (struct memory_range *) malloc(memory_range_len); + if (!exclude_range) + goto err1; + + usablemem_rgns.ranges = (struct memory_range *) + malloc(memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err1; + + memset(memory_range, 0, memory_range_len); + memset(base_memory_range, 0, memory_range_len); + memset(exclude_range, 0, memory_range_len); + memset(usablemem_rgns.ranges, 0, memory_range_len); + return 0; + +err1: + fprintf(stderr, "memory range structure allocation failure\n"); + cleanup_memory_ranges(); + return -1; + +} + +static int realloc_memory_ranges(void) +{ + size_t memory_range_len; + + max_memory_ranges++; + memory_range_len = sizeof(struct memory_range) * max_memory_ranges; + + memory_range = (struct memory_range *) realloc(memory_range, memory_range_len); + if (!memory_range) + goto err; + + base_memory_range = (struct memory_range *) realloc(base_memory_range, memory_range_len); + if (!base_memory_range) + goto err; + + exclude_range = (struct memory_range *) realloc(exclude_range, memory_range_len); + if (!exclude_range) + goto err; + + usablemem_rgns.ranges = (struct memory_range *) + realloc(usablemem_rgns.ranges, memory_range_len); + if (!(usablemem_rgns.ranges)) + goto err; + + return 0; + +err: + fprintf(stderr, "memory range structure re-allocation failure\n"); + return -1; +} + + +static void add_base_memory_range(uint64_t start, uint64_t end) +{ + base_memory_range[nr_memory_ranges].start = start; + base_memory_range[nr_memory_ranges].end = end; + base_memory_range[nr_memory_ranges].type = RANGE_RAM; + nr_memory_ranges++; + if (nr_memory_ranges >= max_memory_ranges) + realloc_memory_ranges(); + + dbgprintf("%016llx-%016llx : %x\n", + base_memory_range[nr_memory_ranges-1].start, + base_memory_range[nr_memory_ranges-1].end, + base_memory_range[nr_memory_ranges-1].type); +} + +static int get_dyn_reconf_base_ranges(void) +{ + uint64_t start, end; + uint64_t size; + char fname[128], buf[32]; + FILE *file; + unsigned int i; + int n; + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,lmb-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + if (fread(buf, 1, 8, file) != 8) { + perror(fname); + fclose(file); + return -1; + } + /* + * lmb_size, num_of_lmb_sets(global variables) are + * initialized once here. + */ + size = lmb_size = be64_to_cpu(((uint64_t *)buf)[0]); + fclose(file); + + strcpy(fname, "/proc/device-tree/"); + strcat(fname, + "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory"); + if ((file = fopen(fname, "r")) == NULL) { + strcat(fname, "-v2"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + return -1; + } + + is_dyn_mem_v2 = 1; + } + + /* first 4 bytes tell the number of lmb set entries */ + if (fread(buf, 1, 4, file) != 4) { + perror(fname); + fclose(file); + return -1; + } + num_of_lmb_sets = be32_to_cpu(((unsigned int *)buf)[0]); + + for (i = 0; i < num_of_lmb_sets; i++) { + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) { + perror(fname); + fclose(file); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + fclose(file); + return -1; + } + + /* + * If the property is ibm,dynamic-memory-v2, the first 4 bytes + * tell the number of sequential LMBs in this entry. + */ + if (is_dyn_mem_v2) + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size; + + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR])); + end = start + size; + add_base_memory_range(start, end); + } + fclose(file); + return 0; +} + +/* Sort the base ranges in memory - this is useful for ensuring that our + * ranges are in ascending order, even if device-tree read of memory nodes + * is done differently. Also, could be used for other range coalescing later + */ +static int sort_base_ranges(void) +{ + int i, j; + unsigned long long tstart, tend; + + for (i = 0; i < nr_memory_ranges - 1; i++) { + for (j = 0; j < nr_memory_ranges - i - 1; j++) { + if (base_memory_range[j].start > base_memory_range[j+1].start) { + tstart = base_memory_range[j].start; + tend = base_memory_range[j].end; + base_memory_range[j].start = base_memory_range[j+1].start; + base_memory_range[j].end = base_memory_range[j+1].end; + base_memory_range[j+1].start = tstart; + base_memory_range[j+1].end = tend; + } + } + } + return 0; +} + +/* Get base memory ranges */ +static int get_base_ranges(void) +{ + uint64_t start, end; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + char buf[MAXBYTES]; + DIR *dir, *dmem; + FILE *file; + struct dirent *dentry, *mentry; + int n; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + while ((dentry = readdir(dir)) != NULL) { + if (!strncmp(dentry->d_name, + "ibm,dynamic-reconfiguration-memory", 35)) { + get_dyn_reconf_base_ranges(); + continue; + } + if (strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory")) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((dmem = opendir(fname)) == NULL) { + perror(fname); + closedir(dir); + return -1; + } + while ((mentry = readdir(dmem)) != NULL) { + if (strcmp(mentry->d_name, "reg")) + continue; + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + closedir(dmem); + closedir(dir); + return -1; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + fclose(file); + closedir(dmem); + closedir(dir); + return -1; + } + if (nr_memory_ranges >= max_memory_ranges) { + if (realloc_memory_ranges() < 0) + break; + } + start = be64_to_cpu(((uint64_t *)buf)[0]); + end = start + be64_to_cpu(((uint64_t *)buf)[1]); + add_base_memory_range(start, end); + fclose(file); + } + closedir(dmem); + } + closedir(dir); + sort_base_ranges(); + memory_max = base_memory_range[nr_memory_ranges - 1].end; + dbgprintf("get base memory ranges:%d\n", nr_memory_ranges); + + return 0; +} + +/* Sort the exclude ranges in memory */ +static int sort_ranges(void) +{ + int i, j; + uint64_t tstart, tend; + for (i = 0; i < nr_exclude_ranges - 1; i++) { + for (j = 0; j < nr_exclude_ranges - i - 1; j++) { + if (exclude_range[j].start > exclude_range[j+1].start) { + tstart = exclude_range[j].start; + tend = exclude_range[j].end; + exclude_range[j].start = exclude_range[j+1].start; + exclude_range[j].end = exclude_range[j+1].end; + exclude_range[j+1].start = tstart; + exclude_range[j+1].end = tend; + } + } + } + return 0; +} + +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index) +{ + char fname[256], buf[16]; + FILE *file; + int i = *range_index; + + strcpy(fname, "/proc/device-tree/reserved-ranges"); + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return; + } + errno = 0; + /* File not present. Non PowerKVM system. */ + return; + } + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) { + uint64_t base, size; + + base = be64_to_cpu(((uint64_t *)buf)[0]); + size = be64_to_cpu(((uint64_t *)buf)[1]); + + exclude_range[i].start = base; + exclude_range[i].end = base + size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + reserve(base, size); + } + fclose(file); + *range_index = i; +} + +/* Return 0 if fname/value valid, -1 otherwise */ +int get_devtree_value(const char *fname, unsigned long long *value) +{ + FILE *file; + char buf[MAXBYTES]; + int n = -1; + + if ((file = fopen(fname, "r"))) { + n = fread(buf, 1, MAXBYTES, file); + fclose(file); + } + + if (n == sizeof(uint32_t)) + *value = ((uint32_t *)buf)[0]; + else if (n == sizeof(uint64_t)) + *value = ((uint64_t *)buf)[0]; + else { + fprintf(stderr, "%s node has invalid size: %d\n", fname, n); + return -1; + } + + return 0; +} + +/* Get devtree details and create exclude_range array + * Also create usablemem_ranges for KEXEC_ON_CRASH + */ +static int get_devtree_details(unsigned long kexec_flags) +{ + uint64_t rma_base = -1, base; + uint64_t tce_base; + unsigned int tce_size; + uint64_t htab_base, htab_size; + uint64_t kernel_end; + uint64_t initrd_start, initrd_end; + char buf[MAXBYTES]; + char device_tree[256] = "/proc/device-tree/"; + char fname[256]; + DIR *dir, *cdir; + FILE *file; + struct dirent *dentry; + struct stat fstat; + int n, i = 0; + + if ((dir = opendir(device_tree)) == NULL) { + perror(device_tree); + return -1; + } + + scan_reserved_ranges(kexec_flags, &i); + + while ((dentry = readdir(dir)) != NULL) { + if (strncmp(dentry->d_name, "chosen", 6) && + strncmp(dentry->d_name, "memory@", 7) && + strcmp(dentry->d_name, "memory") && + strncmp(dentry->d_name, "pci@", 4) && + strncmp(dentry->d_name, "rtas", 4) && + strncmp(dentry->d_name, "ibm,opal", 8)) + continue; + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + if ((cdir = opendir(fname)) == NULL) { + perror(fname); + goto error_opendir; + } + + if (strncmp(dentry->d_name, "chosen", 6) == 0) { + strcat(fname, "/linux,kernel-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&kernel_end, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + kernel_end = be64_to_cpu(kernel_end); + + /* Add kernel memory to exclude_range */ + exclude_range[i].start = 0x0UL; + exclude_range[i].end = kernel_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + if (kexec_flags & KEXEC_ON_CRASH) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_base, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_base = be64_to_cpu(crash_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,crashkernel-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&crash_size, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + crash_size = be64_to_cpu(crash_size); + + if (crash_base > mem_min) + mem_min = crash_base; + if (crash_base + crash_size < mem_max) + mem_max = crash_base + crash_size; + + add_usable_mem_rgns(0, crash_base + crash_size); + reserve(KDUMP_BACKUP_LIMIT, crash_base-KDUMP_BACKUP_LIMIT); + } + /* + * Read the first kernel's memory limit. + * If the first kernel is booted with mem= option then + * it would export "linux,memory-limit" file + * reflecting value for the same. + */ + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,memory-limit"); + if ((file = fopen(fname, "r")) == NULL) { + if (errno != ENOENT) { + perror(fname); + goto error_opencdir; + } + errno = 0; + /* + * File not present. + * fall through. On older kernel this file + * is not present. + */ + } else { + if (fread(&memory_limit, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + memory_limit = be64_to_cpu(memory_limit); + } + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&htab_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_base = be64_to_cpu(htab_base); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,htab-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&htab_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + htab_size = be64_to_cpu(htab_size); + + /* Add htab address to exclude_range - NON-LPAR only */ + exclude_range[i].start = htab_base; + exclude_range[i].end = htab_base + htab_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + /* reserve the initrd_start and end locations. */ + if (reuse_initrd) { + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-start"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_start, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_start = be64_to_cpu(initrd_start); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,initrd-end"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + /* check for 4 and 8 byte initrd offset sizes */ + if (stat(fname, &fstat) != 0) { + perror(fname); + goto error_openfile; + } + if (fread(&initrd_end, fstat.st_size, 1, file) != 1) { + perror(fname); + goto error_openfile; + } + initrd_end = be64_to_cpu(initrd_end); + fclose(file); + + /* Add initrd address to exclude_range */ + exclude_range[i].start = initrd_start; + exclude_range[i].end = initrd_end; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* chosen */ + + if (strncmp(dentry->d_name, "rtas", 4) == 0) { + strcat(fname, "/linux,rtas-base"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_base, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + rtas_base = be32_to_cpu(rtas_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/rtas-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&rtas_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + rtas_size = be32_to_cpu(rtas_size); + /* Add rtas to exclude_range */ + exclude_range[i].start = rtas_base; + exclude_range[i].end = rtas_base + rtas_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(rtas_base, rtas_size); + } /* rtas */ + + if (strncmp(dentry->d_name, "ibm,opal", 8) == 0) { + strcat(fname, "/opal-base-address"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + opal_base = be64_to_cpu(opal_base); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/opal-runtime-size"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + opal_size = be64_to_cpu(opal_size); + /* Add OPAL to exclude_range */ + exclude_range[i].start = opal_base; + exclude_range[i].end = opal_base + opal_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(opal_base, opal_size); + } /* ibm,opal */ + + if (!strncmp(dentry->d_name, "memory@", 7) || + !strcmp(dentry->d_name, "memory")) { + strcat(fname, "/reg"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if ((n = fread(buf, 1, MAXBYTES, file)) < 0) { + perror(fname); + goto error_openfile; + } + base = be64_to_cpu(((uint64_t *)buf)[0]); + if (base < rma_base) { + rma_base = base; + rma_top = base + be64_to_cpu(((uint64_t *)buf)[1]); + } + + fclose(file); + closedir(cdir); + } /* memory */ + + if (strncmp(dentry->d_name, "pci@", 4) == 0) { + strcat(fname, "/linux,tce-base"); + if ((file = fopen(fname, "r")) == NULL) { + closedir(cdir); + if (errno == ENOENT) { + /* Non LPAR */ + errno = 0; + continue; + } + perror(fname); + goto error_opendir; + } + if (fread(&tce_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_base = be64_to_cpu(tce_base); + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/linux,tce-size"); + if ((file = fopen(fname, "r")) == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&tce_size, sizeof(unsigned int), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + tce_size = be32_to_cpu(tce_size); + /* Add tce to exclude_range - NON-LPAR only */ + exclude_range[i].start = tce_base; + exclude_range[i].end = tce_base + tce_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(tce_base, tce_size); + closedir(cdir); + } /* pci */ + } + closedir(dir); + + nr_exclude_ranges = i; + + sort_ranges(); + + int k; + for (k = 0; k < i; k++) + dbgprintf("exclude_range sorted exclude_range[%d] " + "start:%llx, end:%llx\n", k, exclude_range[k].start, + exclude_range[k].end); + + return 0; + +error_openfile: + fclose(file); +error_opencdir: + closedir(cdir); +error_opendir: + closedir(dir); + return -1; +} + +/* Setup a sorted list of memory ranges. */ +int setup_memory_ranges(unsigned long kexec_flags) +{ + int i, j = 0; + + /* Get the base list of memory ranges from /proc/device-tree/memory + * nodes. Build list of ranges to be excluded from valid memory + */ + + if (get_base_ranges()) + goto out; + if (get_devtree_details(kexec_flags)) + goto out; + + for (i = 0; i < nr_exclude_ranges; i++) { + /* If first exclude range does not start with 0, include the + * first hole of valid memory from 0 - exclude_range[0].start + */ + if (i == 0) { + if (exclude_range[i].start != 0) { + memory_range[j].start = 0; + memory_range[j].end = exclude_range[i].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + } + } /* i == 0 */ + /* If the last exclude range does not end at memory_max, include + * the last hole of valid memory from exclude_range[last].end - + * memory_max + */ + if (i == nr_exclude_ranges - 1) { + if (exclude_range[i].end < memory_max) { + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = memory_max; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit the end to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + continue; + } + } /* i == nr_exclude_ranges - 1 */ + /* contiguous exclude ranges - skip */ + if (exclude_range[i+1].start == exclude_range[i].end + 1) + continue; + memory_range[j].start = exclude_range[i].end + 1; + memory_range[j].end = exclude_range[i+1].start - 1; + memory_range[j].type = RANGE_RAM; + j++; + if (j >= max_memory_ranges) + realloc_memory_ranges(); + /* Limit range to rma_top */ + if (memory_range[j-1].start >= rma_top) { + j--; + break; + } + if ((memory_range[j-1].start < rma_top) && + (memory_range[j-1].end >= rma_top)) { + memory_range[j-1].end = rma_top; + break; + } + } + nr_memory_ranges = j; + + int k; + for (k = 0; k < j; k++) + dbgprintf("setup_memory_ranges memory_range[%d] " + "start:%llx, end:%llx\n", k, memory_range[k].start, + memory_range[k].end); + return 0; + +out: + cleanup_memory_ranges(); + return -1; +} + +/* Return a list of valid memory ranges */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + /* allocate memory_range dynamically */ + max_memory_ranges = 1; + + if (alloc_memory_ranges()) + return -1; + if (setup_memory_ranges(kexec_flags)) + return -1; + + /* + * copy the memory here, another realloc_memory_ranges might + * corrupt the old memory + */ + *range = calloc(sizeof(struct memory_range), nr_memory_ranges); + if (*range == NULL) + return -1; + memmove(*range, memory_range, + sizeof(struct memory_range) * nr_memory_ranges); + + *ranges = nr_memory_ranges; + dbgprintf("get memory ranges:%d\n", nr_memory_ranges); + return 0; +} + +struct file_type file_type[] = { + { "elf-ppc64", elf_ppc64_probe, elf_ppc64_load, elf_ppc64_usage }, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ + fprintf(stderr, " --elf64-core-headers Prepare core headers in ELF64 format\n"); + fprintf(stderr, " --dt-no-old-root Do not reuse old kernel root= param.\n" \ + " while creating flatten device tree.\n"); +} + +struct arch_options_t arch_options = { + .core_header_type = CORE_TYPE_ELF64, +}; + +int arch_process_options(int argc, char **argv) +{ + /* We look for all options so getopt_long doesn't start reordering + * argv[] before file_type[n].load() gets a look in. + */ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch(opt) { + default: + break; + case OPT_ELF64_CORE: + arch_options.core_header_type = CORE_TYPE_ELF64; + break; + case OPT_DT_NO_OLD_ROOT: + dt_no_old_root = 1; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + /* We are running a 32-bit kexec-tools on 64-bit ppc64. + * So pass KEXEC_ARCH_PPC64 here + */ + { "ppc64", KEXEC_ARCH_PPC64 }, + { "ppc64le", KEXEC_ARCH_PPC64 }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h new file mode 100644 index 0000000..434b4bf --- /dev/null +++ b/kexec/arch/ppc64/kexec-ppc64.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_PPC64_H +#define KEXEC_PPC64_H + +#define PATH_LEN 256 +#define MAXBYTES 128 +#define MAX_LINE 160 +#define CORE_TYPE_ELF32 1 +#define CORE_TYPE_ELF64 2 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 17 +#if (BOOT_BLOCK_VERSION < 16) +# define NEED_STRUCTURE_BLOCK_EXTRA_PAD +#endif +#define HAVE_DYNAMIC_MEMORY +#define NEED_RESERVE_DTB + +extern int get_devtree_value(const char *fname, unsigned long long *pvalue); + +int setup_memory_ranges(unsigned long kexec_flags); + +int elf_ppc64_probe(const char *buf, off_t len); +int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_ppc64_usage(void); + +struct mem_ehdr; +unsigned long my_r2(const struct mem_ehdr *ehdr); + +extern uint64_t initrd_base, initrd_size; +extern int max_memory_ranges; +extern unsigned char reuse_initrd; + +struct arch_options_t { + int core_header_type; +}; + +typedef struct mem_rgns { + unsigned int size; + struct memory_range *ranges; +} mem_rgns_t; + +extern mem_rgns_t usablemem_rgns; + +#endif /* KEXEC_PPC64_H */ diff --git a/kexec/arch/ppc64/kexec-zImage-ppc64.c b/kexec/arch/ppc64/kexec-zImage-ppc64.c new file mode 100644 index 0000000..e946205 --- /dev/null +++ b/kexec/arch/ppc64/kexec-zImage-ppc64.c @@ -0,0 +1,184 @@ +/* + * kexec: Linux boots Linux + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <linux/elf.h> +#include "../../kexec.h" + +#define MAX_HEADERS 32 + +int zImage_ppc64_probe(FILE *file) +{ + Elf32_Ehdr elf; + int valid; + + if (fseek(file, 0, SEEK_SET) < 0) { + fprintf(stderr, "seek error: %s\n", + strerror(errno)); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + fprintf(stderr, "read error: %s\n", + strerror(errno)); + return -1; + } + + if (elf.e_machine == EM_PPC64) { + fprintf(stderr, "Elf64 not supported\n"); + return -1; + } + + valid = (elf.e_ident[EI_MAG0] == ELFMAG0 && + elf.e_ident[EI_MAG1] == ELFMAG1 && + elf.e_ident[EI_MAG2] == ELFMAG2 && + elf.e_ident[EI_MAG3] == ELFMAG3 && + elf.e_ident[EI_CLASS] == ELFCLASS32 && + elf.e_ident[EI_DATA] == ELFDATA2MSB && + elf.e_type == ET_EXEC && + elf.e_machine == EM_PPC); + + return valid ? 0 : -1; +} + +int zImage_ppc64_load(FILE *file, int UNUSED(argc), char **UNUSED(argv), + void **ret_entry, struct kexec_segment **ret_segments, + int *ret_nr_segments) +{ + Elf32_Ehdr elf; + Elf32_Phdr *p, *ph; + struct kexec_segment *segment; + int i; + unsigned long memsize, filesize, offset, load_loc = 0; + + /* Parse command line arguments */ + + /* Read in the Elf32 header */ + if (fseek(file, 0, SEEK_SET) < 0) { + perror("seek error:"); + return -1; + } + if (fread(&elf, sizeof(Elf32_Ehdr), 1, file) != 1) { + perror("read error: "); + return -1; + } + if (elf.e_phnum > MAX_HEADERS) { + fprintf(stderr, + "Only kernels with %i program headers are supported\n", + MAX_HEADERS); + return -1; + } + + /* Read the section header */ + ph = (Elf32_Phdr *)malloc(sizeof(Elf32_Phdr) * elf.e_phnum); + if (ph == 0) { + perror("malloc failed: "); + return -1; + } + if (fseek(file, elf.e_phoff, SEEK_SET) < 0) { + perror("seek failed: "); + free(ph); + return -1; + } + if (fread(ph, sizeof(Elf32_Phdr) * elf.e_phnum, 1, file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + + *ret_segments = malloc(elf.e_phnum * sizeof(struct kexec_segment)); + if (*ret_segments == 0) { + fprintf(stderr, "malloc failed: %s\n", + strerror(errno)); + free(ph); + return -1; + } + segment = ret_segments[0]; + + /* Scan through the program header */ + memsize = filesize = offset = 0; + p = ph; + for (i = 0; i < elf.e_phnum; ++i, ++p) { + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + if (memsize == 0) { + offset = p->p_offset; + memsize = p->p_memsz; + filesize = p->p_filesz; + load_loc = p->p_vaddr; + } else { + memsize = p->p_offset + p->p_memsz - offset; + filesize = p->p_offset + p->p_filesz - offset; + } + } + if (memsize == 0) { + fprintf(stderr, "Can't find a loadable segment.\n"); + free(ph); + return -1; + } + + /* Load program segments */ + p = ph; + segment->buf = malloc(filesize); + if (segment->buf == 0) { + perror("malloc failed: "); + free(ph); + return -1; + } + for (i = 0; i < elf.e_phnum; ++i, ++p) { + unsigned long mem_offset; + if (p->p_type != PT_LOAD || p->p_offset == 0) + continue; + + /* skip to the actual image */ + if (fseek(file, p->p_offset, SEEK_SET) < 0) { + perror("seek error: "); + free(ph); + return -1; + } + mem_offset = p->p_vaddr - load_loc; + if (fread((void *)segment->buf+mem_offset, p->p_filesz, 1, + file) != 1) { + perror("read error: "); + free(ph); + return -1; + } + } + segment->mem = (void *) load_loc; + segment->memsz = memsize; + segment->bufsz = filesize; + *ret_entry = (void *)(uintptr_t)elf.e_entry; + *ret_nr_segments = i - 1; + free(ph); + return 0; +} + +void zImage_ppc64_usage(void) +{ + fprintf(stderr, "zImage support is still broken\n"); +} |