summaryrefslogtreecommitdiffstats
path: root/kexec/arch/arm64/kexec-arm64.c
diff options
context:
space:
mode:
Diffstat (limited to 'kexec/arch/arm64/kexec-arm64.c')
-rw-r--r--kexec/arch/arm64/kexec-arm64.c1365
1 files changed, 1365 insertions, 0 deletions
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
new file mode 100644
index 0000000..4a67b0d
--- /dev/null
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -0,0 +1,1365 @@
+/*
+ * ARM64 kexec.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <libfdt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <linux/elf-em.h>
+#include <elf.h>
+#include <elf_info.h>
+
+#include <unistd.h>
+#include <syscall.h>
+#include <errno.h>
+#include <linux/random.h>
+
+#include "kexec.h"
+#include "kexec-arm64.h"
+#include "crashdump.h"
+#include "crashdump-arm64.h"
+#include "dt-ops.h"
+#include "fs2dt.h"
+#include "iomem.h"
+#include "kexec-syscall.h"
+#include "mem_regions.h"
+#include "arch/options.h"
+
+#define ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#define ROOT_NODE_SIZE_CELLS_DEFAULT 1
+
+#define PROP_ADDR_CELLS "#address-cells"
+#define PROP_SIZE_CELLS "#size-cells"
+#define PROP_ELFCOREHDR "linux,elfcorehdr"
+#define PROP_USABLE_MEM_RANGE "linux,usable-memory-range"
+
+#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36)
+#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39)
+#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42)
+#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47)
+#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48)
+
+/* Global flag which indicates that we have tried reading
+ * PHYS_OFFSET from 'kcore' already.
+ */
+static bool try_read_phys_offset_from_kcore = false;
+
+/* Machine specific details. */
+static int va_bits = -1;
+static unsigned long page_offset;
+
+/* Global varables the core kexec routines expect. */
+
+unsigned char reuse_initrd;
+
+off_t initrd_base;
+off_t initrd_size;
+
+const struct arch_map_entry arches[] = {
+ { "aarch64", KEXEC_ARCH_ARM64 },
+ { "aarch64_be", KEXEC_ARCH_ARM64 },
+ { NULL, 0 },
+};
+
+struct file_type file_type[] = {
+ {"vmlinux", elf_arm64_probe, elf_arm64_load, elf_arm64_usage},
+ {"Image", image_arm64_probe, image_arm64_load, image_arm64_usage},
+ {"uImage", uImage_arm64_probe, uImage_arm64_load, uImage_arm64_usage},
+ {"vmlinuz", pez_arm64_probe, pez_arm64_load, pez_arm64_usage},
+};
+
+int file_types = sizeof(file_type) / sizeof(file_type[0]);
+
+/* arm64 global varables. */
+
+struct arm64_opts arm64_opts;
+struct arm64_mem arm64_mem = {
+ .phys_offset = arm64_mem_ngv,
+ .vp_offset = arm64_mem_ngv,
+};
+
+uint64_t get_phys_offset(void)
+{
+ assert(arm64_mem.phys_offset != arm64_mem_ngv);
+ return arm64_mem.phys_offset;
+}
+
+uint64_t get_vp_offset(void)
+{
+ assert(arm64_mem.vp_offset != arm64_mem_ngv);
+ return arm64_mem.vp_offset;
+}
+
+/**
+ * arm64_process_image_header - Process the arm64 image header.
+ *
+ * Make a guess that KERNEL_IMAGE_SIZE will be enough for older kernels.
+ */
+
+int arm64_process_image_header(const struct arm64_image_header *h)
+{
+#if !defined(KERNEL_IMAGE_SIZE)
+# define KERNEL_IMAGE_SIZE MiB(16)
+#endif
+
+ if (!arm64_header_check_magic(h))
+ return EFAILED;
+
+ if (h->image_size) {
+ arm64_mem.text_offset = arm64_header_text_offset(h);
+ arm64_mem.image_size = arm64_header_image_size(h);
+ } else {
+ /* For 3.16 and older kernels. */
+ arm64_mem.text_offset = 0x80000;
+ arm64_mem.image_size = KERNEL_IMAGE_SIZE;
+ fprintf(stderr,
+ "kexec: %s: Warning: Kernel image size set to %lu MiB.\n"
+ " Please verify compatability with lodaed kernel.\n",
+ __func__, KERNEL_IMAGE_SIZE / 1024UL / 1024UL);
+ }
+
+ return 0;
+}
+
+void arch_usage(void)
+{
+ printf(arm64_opts_usage);
+}
+
+int arch_process_options(int argc, char **argv)
+{
+ static const char short_options[] = KEXEC_OPT_STR "";
+ static const struct option options[] = {
+ KEXEC_ARCH_OPTIONS
+ { 0 }
+ };
+ int opt;
+ char *cmdline = NULL;
+ const char *append = NULL;
+ int do_kexec_file_syscall = 0;
+
+ for (opt = 0; opt != -1; ) {
+ opt = getopt_long(argc, argv, short_options, options, 0);
+
+ switch (opt) {
+ case OPT_APPEND:
+ append = optarg;
+ break;
+ case OPT_REUSE_CMDLINE:
+ cmdline = get_command_line();
+ break;
+ case OPT_DTB:
+ arm64_opts.dtb = optarg;
+ break;
+ case OPT_INITRD:
+ arm64_opts.initrd = optarg;
+ break;
+ case OPT_KEXEC_FILE_SYSCALL:
+ do_kexec_file_syscall = 1;
+ case OPT_SERIAL:
+ arm64_opts.console = optarg;
+ break;
+ default:
+ break; /* Ignore core and unknown options. */
+ }
+ }
+
+ arm64_opts.command_line = concat_cmdline(cmdline, append);
+
+ dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__,
+ arm64_opts.command_line);
+ dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__,
+ arm64_opts.initrd);
+ dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__,
+ (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" :
+ arm64_opts.dtb));
+ dbgprintf("%s:%d: console: %s\n", __func__, __LINE__,
+ arm64_opts.console);
+
+ if (do_kexec_file_syscall)
+ arm64_opts.dtb = NULL;
+
+ return 0;
+}
+
+/**
+ * find_purgatory_sink - Find a sink for purgatory output.
+ */
+
+static uint64_t find_purgatory_sink(const char *console)
+{
+ int fd, ret;
+ char device[255], mem[255];
+ struct stat sb;
+ char buffer[10];
+ uint64_t iomem = 0x0;
+
+ if (!console)
+ return 0;
+
+ ret = snprintf(device, sizeof(device), "/sys/class/tty/%s", console);
+ if (ret < 0 || ret >= sizeof(device)) {
+ fprintf(stderr, "snprintf failed: %s\n", strerror(errno));
+ return 0;
+ }
+
+ if (stat(device, &sb) || !S_ISDIR(sb.st_mode)) {
+ fprintf(stderr, "kexec: %s: No valid console found for %s\n",
+ __func__, device);
+ return 0;
+ }
+
+ ret = snprintf(mem, sizeof(mem), "%s%s", device, "/iomem_base");
+ if (ret < 0 || ret >= sizeof(mem)) {
+ fprintf(stderr, "snprintf failed: %s\n", strerror(errno));
+ return 0;
+ }
+
+ printf("console memory read from %s\n", mem);
+
+ fd = open(mem, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "kexec: %s: No able to open %s\n",
+ __func__, mem);
+ return 0;
+ }
+
+ memset(buffer, '\0', sizeof(buffer));
+ ret = read(fd, buffer, sizeof(buffer));
+ if (ret < 0) {
+ fprintf(stderr, "kexec: %s: not able to read fd\n", __func__);
+ close(fd);
+ return 0;
+ }
+
+ sscanf(buffer, "%lx", &iomem);
+ printf("console memory is at %#lx\n", iomem);
+
+ close(fd);
+ return iomem;
+}
+
+/**
+ * struct dtb - Info about a binary device tree.
+ *
+ * @buf: Device tree data.
+ * @size: Device tree data size.
+ * @name: Shorthand name of this dtb for messages.
+ * @path: Filesystem path.
+ */
+
+struct dtb {
+ char *buf;
+ off_t size;
+ const char *name;
+ const char *path;
+};
+
+/**
+ * dump_reservemap - Dump the dtb's reservemap.
+ */
+
+static void dump_reservemap(const struct dtb *dtb)
+{
+ int i;
+
+ for (i = 0; ; i++) {
+ uint64_t address;
+ uint64_t size;
+
+ fdt_get_mem_rsv(dtb->buf, i, &address, &size);
+
+ if (!size)
+ break;
+
+ dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__,
+ dtb->name, address, size);
+ }
+}
+
+/**
+ * set_bootargs - Set the dtb's bootargs.
+ */
+
+static int set_bootargs(struct dtb *dtb, const char *command_line)
+{
+ int result;
+
+ if (!command_line || !command_line[0])
+ return 0;
+
+ result = dtb_set_bootargs(&dtb->buf, &dtb->size, command_line);
+
+ if (result) {
+ fprintf(stderr,
+ "kexec: Set device tree bootargs failed.\n");
+ return EFAILED;
+ }
+
+ return 0;
+}
+
+/**
+ * read_proc_dtb - Read /proc/device-tree.
+ */
+
+static int read_proc_dtb(struct dtb *dtb)
+{
+ int result;
+ struct stat s;
+ static const char path[] = "/proc/device-tree";
+
+ result = stat(path, &s);
+
+ if (result) {
+ dbgprintf("%s: %s\n", __func__, strerror(errno));
+ return EFAILED;
+ }
+
+ dtb->path = path;
+ create_flatten_tree((char **)&dtb->buf, &dtb->size, NULL);
+
+ return 0;
+}
+
+/**
+ * read_sys_dtb - Read /sys/firmware/fdt.
+ */
+
+static int read_sys_dtb(struct dtb *dtb)
+{
+ int result;
+ struct stat s;
+ static const char path[] = "/sys/firmware/fdt";
+
+ result = stat(path, &s);
+
+ if (result) {
+ dbgprintf("%s: %s\n", __func__, strerror(errno));
+ return EFAILED;
+ }
+
+ dtb->path = path;
+ dtb->buf = slurp_file(path, &dtb->size);
+
+ return 0;
+}
+
+/**
+ * read_1st_dtb - Read the 1st stage kernel's dtb.
+ */
+
+static int read_1st_dtb(struct dtb *dtb)
+{
+ int result;
+
+ dtb->name = "dtb_sys";
+ result = read_sys_dtb(dtb);
+
+ if (!result)
+ goto on_success;
+
+ dtb->name = "dtb_proc";
+ result = read_proc_dtb(dtb);
+
+ if (!result)
+ goto on_success;
+
+ dbgprintf("%s: not found\n", __func__);
+ return EFAILED;
+
+on_success:
+ dbgprintf("%s: found %s\n", __func__, dtb->path);
+ return 0;
+}
+
+static int get_cells_size(void *fdt, uint32_t *address_cells,
+ uint32_t *size_cells)
+{
+ int nodeoffset;
+ const uint32_t *prop = NULL;
+ int prop_len;
+
+ /* default values */
+ *address_cells = ROOT_NODE_ADDR_CELLS_DEFAULT;
+ *size_cells = ROOT_NODE_SIZE_CELLS_DEFAULT;
+
+ /* under root node */
+ nodeoffset = fdt_path_offset(fdt, "/");
+ if (nodeoffset < 0)
+ goto on_error;
+
+ prop = fdt_getprop(fdt, nodeoffset, PROP_ADDR_CELLS, &prop_len);
+ if (prop) {
+ if (prop_len == sizeof(*prop))
+ *address_cells = fdt32_to_cpu(*prop);
+ else
+ goto on_error;
+ }
+
+ prop = fdt_getprop(fdt, nodeoffset, PROP_SIZE_CELLS, &prop_len);
+ if (prop) {
+ if (prop_len == sizeof(*prop))
+ *size_cells = fdt32_to_cpu(*prop);
+ else
+ goto on_error;
+ }
+
+ dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__,
+ *address_cells, *size_cells);
+ return 0;
+
+on_error:
+ return EFAILED;
+}
+
+static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells,
+ struct memory_range *range)
+{
+ dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end);
+
+ /* if *_cells >= 2, cells can hold 64-bit values anyway */
+ if ((address_cells == 1) && (range->start >= (1ULL << 32)))
+ return false;
+
+ if ((size_cells == 1) &&
+ ((range->end - range->start + 1) >= (1ULL << 32)))
+ return false;
+
+ return true;
+}
+
+static void fill_property(void *buf, uint64_t val, uint32_t cells)
+{
+ uint32_t val32;
+ int i;
+
+ if (cells == 1) {
+ val32 = cpu_to_fdt32((uint32_t)val);
+ memcpy(buf, &val32, sizeof(uint32_t));
+ } else {
+ for (i = 0;
+ i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++)
+ *(char *)buf++ = 0;
+
+ val = cpu_to_fdt64(val);
+ memcpy(buf, &val, sizeof(uint64_t));
+ }
+}
+
+static int fdt_setprop_ranges(void *fdt, int nodeoffset, const char *name,
+ struct memory_range *ranges, int nr_ranges, bool reverse,
+ uint32_t address_cells, uint32_t size_cells)
+{
+ void *buf, *prop;
+ size_t buf_size;
+ int i, result;
+ struct memory_range *range;
+
+ buf_size = (address_cells + size_cells) * sizeof(uint32_t) * nr_ranges;
+ prop = buf = xmalloc(buf_size);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_ranges; i++) {
+ if (reverse)
+ range = ranges + (nr_ranges - 1 - i);
+ else
+ range = ranges + i;
+
+ fill_property(prop, range->start, address_cells);
+ prop += address_cells * sizeof(uint32_t);
+
+ fill_property(prop, range->end - range->start + 1, size_cells);
+ prop += size_cells * sizeof(uint32_t);
+ }
+
+ result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size);
+
+ free(buf);
+
+ return result;
+}
+
+/**
+ * setup_2nd_dtb - Setup the 2nd stage kernel's dtb.
+ */
+
+static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash)
+{
+ uint32_t address_cells, size_cells;
+ uint64_t fdt_val64;
+ uint64_t *prop;
+ char *new_buf = NULL;
+ int len, range_len;
+ int nodeoffset;
+ int new_size;
+ int i, result, kaslr_seed;
+
+ result = fdt_check_header(dtb->buf);
+
+ if (result) {
+ fprintf(stderr, "kexec: Invalid 2nd device tree.\n");
+ return EFAILED;
+ }
+
+ result = set_bootargs(dtb, command_line);
+ if (result) {
+ fprintf(stderr, "kexec: cannot set bootargs.\n");
+ result = -EINVAL;
+ goto on_error;
+ }
+
+ /* determine #address-cells and #size-cells */
+ result = get_cells_size(dtb->buf, &address_cells, &size_cells);
+ if (result) {
+ fprintf(stderr, "kexec: cannot determine cells-size.\n");
+ result = -EINVAL;
+ goto on_error;
+ }
+
+ if (!cells_size_fitted(address_cells, size_cells,
+ &elfcorehdr_mem)) {
+ fprintf(stderr, "kexec: elfcorehdr doesn't fit cells-size.\n");
+ result = -EINVAL;
+ goto on_error;
+ }
+
+ for (i = 0; i < usablemem_rgns.size; i++) {
+ if (!cells_size_fitted(address_cells, size_cells,
+ &crash_reserved_mem[i])) {
+ fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n");
+ result = -EINVAL;
+ goto on_error;
+ }
+ }
+
+ /* duplicate dt blob */
+ range_len = sizeof(uint32_t) * (address_cells + size_cells);
+ new_size = fdt_totalsize(dtb->buf)
+ + fdt_prop_len(PROP_ELFCOREHDR, range_len)
+ + fdt_prop_len(PROP_USABLE_MEM_RANGE, range_len * usablemem_rgns.size);
+
+ new_buf = xmalloc(new_size);
+ result = fdt_open_into(dtb->buf, new_buf, new_size);
+ if (result) {
+ dbgprintf("%s: fdt_open_into failed: %s\n", __func__,
+ fdt_strerror(result));
+ result = -ENOSPC;
+ goto on_error;
+ }
+
+ /* fixup 'kaslr-seed' with a random value, if supported */
+ nodeoffset = fdt_path_offset(new_buf, "/chosen");
+ prop = fdt_getprop_w(new_buf, nodeoffset,
+ "kaslr-seed", &len);
+ if (!prop || len != sizeof(uint64_t)) {
+ dbgprintf("%s: no kaslr-seed found\n",
+ __func__);
+ /* for kexec warm reboot case, we don't need to fixup
+ * other dtb properties
+ */
+ if (!on_crash) {
+ dump_reservemap(dtb);
+ if (new_buf)
+ free(new_buf);
+
+ return result;
+ }
+ } else {
+ kaslr_seed = fdt64_to_cpu(*prop);
+
+ /* kaslr_seed must be wiped clean by primary
+ * kernel during boot
+ */
+ if (kaslr_seed != 0) {
+ dbgprintf("%s: kaslr-seed is not wiped to 0.\n",
+ __func__);
+ result = -EINVAL;
+ goto on_error;
+ }
+
+ /*
+ * Invoke the getrandom system call with
+ * GRND_NONBLOCK, to make sure we
+ * have a valid random seed to pass to the
+ * secondary kernel.
+ */
+ result = syscall(SYS_getrandom, &fdt_val64,
+ sizeof(fdt_val64),
+ GRND_NONBLOCK);
+
+ if(result == -1) {
+ fprintf(stderr, "%s: Reading random bytes failed.\n",
+ __func__);
+
+ /* Currently on some arm64 platforms this
+ * 'getrandom' system call fails while booting
+ * the platform.
+ *
+ * In case, this happens at best we can set
+ * the 'kaslr_seed' as 0, indicating that the
+ * 2nd kernel will be booted with a 'nokaslr'
+ * like behaviour.
+ */
+ fdt_val64 = 0UL;
+ dbgprintf("%s: Disabling KASLR in secondary kernel.\n",
+ __func__);
+ }
+
+ nodeoffset = fdt_path_offset(new_buf, "/chosen");
+ result = fdt_setprop_inplace(new_buf,
+ nodeoffset, "kaslr-seed",
+ &fdt_val64, sizeof(fdt_val64));
+ if (result) {
+ dbgprintf("%s: fdt_setprop failed: %s\n",
+ __func__, fdt_strerror(result));
+ result = -EINVAL;
+ goto on_error;
+ }
+ }
+
+ if (on_crash) {
+ /* add linux,elfcorehdr */
+ nodeoffset = fdt_path_offset(new_buf, "/chosen");
+ result = fdt_setprop_ranges(new_buf, nodeoffset,
+ PROP_ELFCOREHDR, &elfcorehdr_mem, 1, false,
+ address_cells, size_cells);
+ if (result) {
+ dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
+ fdt_strerror(result));
+ result = -EINVAL;
+ goto on_error;
+ }
+
+ /*
+ * add linux,usable-memory-range
+ *
+ * crash dump kernel support one or two regions, to make
+ * compatibility with existing user-space and older kdump, the
+ * low region is always the last one.
+ */
+ nodeoffset = fdt_path_offset(new_buf, "/chosen");
+ result = fdt_setprop_ranges(new_buf, nodeoffset,
+ PROP_USABLE_MEM_RANGE,
+ usablemem_rgns.ranges, usablemem_rgns.size, true,
+ address_cells, size_cells);
+ if (result) {
+ dbgprintf("%s: fdt_setprop failed: %s\n", __func__,
+ fdt_strerror(result));
+ result = -EINVAL;
+ goto on_error;
+ }
+ }
+
+ fdt_pack(new_buf);
+ dtb->buf = new_buf;
+ dtb->size = fdt_totalsize(new_buf);
+
+ dump_reservemap(dtb);
+
+ return result;
+
+on_error:
+ fprintf(stderr, "kexec: %s failed.\n", __func__);
+ if (new_buf)
+ free(new_buf);
+
+ return result;
+}
+
+unsigned long arm64_locate_kernel_segment(struct kexec_info *info)
+{
+ unsigned long hole;
+
+ if (info->kexec_flags & KEXEC_ON_CRASH) {
+ unsigned long hole_end;
+
+ hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ?
+ mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start);
+ hole = _ALIGN_UP(hole, MiB(2));
+ hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size;
+
+ if ((hole_end > mem_max) ||
+ (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) {
+ dbgprintf("%s: Crash kernel out of range\n", __func__);
+ hole = ULONG_MAX;
+ }
+ } else {
+ hole = locate_hole(info,
+ arm64_mem.text_offset + arm64_mem.image_size,
+ MiB(2), 0, ULONG_MAX, 1);
+
+ if (hole == ULONG_MAX)
+ dbgprintf("%s: locate_hole failed\n", __func__);
+ }
+
+ return hole;
+}
+
+/**
+ * arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments.
+ */
+
+int arm64_load_other_segments(struct kexec_info *info,
+ unsigned long image_base)
+{
+ int result;
+ unsigned long dtb_base;
+ unsigned long hole_min;
+ unsigned long hole_max;
+ unsigned long initrd_end;
+ uint64_t purgatory_sink;
+ char *initrd_buf = NULL;
+ struct dtb dtb;
+ char command_line[COMMAND_LINE_SIZE] = "";
+
+ if (arm64_opts.command_line) {
+ if (strlen(arm64_opts.command_line) >
+ sizeof(command_line) - 1) {
+ fprintf(stderr,
+ "Kernel command line too long for kernel!\n");
+ return EFAILED;
+ }
+
+ strncpy(command_line, arm64_opts.command_line,
+ sizeof(command_line) - 1);
+ command_line[sizeof(command_line) - 1] = 0;
+ }
+
+ purgatory_sink = find_purgatory_sink(arm64_opts.console);
+
+ dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__,
+ purgatory_sink);
+
+ if (arm64_opts.dtb) {
+ dtb.name = "dtb_user";
+ dtb.buf = slurp_file(arm64_opts.dtb, &dtb.size);
+ } else {
+ result = read_1st_dtb(&dtb);
+
+ if (result) {
+ fprintf(stderr,
+ "kexec: Error: No device tree available.\n");
+ return EFAILED;
+ }
+ }
+
+ result = setup_2nd_dtb(&dtb, command_line,
+ info->kexec_flags & KEXEC_ON_CRASH);
+
+ if (result)
+ return EFAILED;
+
+ /* Put the other segments after the image. */
+
+ hole_min = image_base + arm64_mem.image_size;
+ if (info->kexec_flags & KEXEC_ON_CRASH)
+ hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end;
+ else
+ hole_max = ULONG_MAX;
+
+ if (arm64_opts.initrd) {
+ initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size);
+
+ if (!initrd_buf)
+ fprintf(stderr, "kexec: Empty ramdisk file.\n");
+ else {
+ /* Put the initrd after the kernel. */
+
+ initrd_base = add_buffer_phys_virt(info, initrd_buf,
+ initrd_size, initrd_size, 0,
+ hole_min, hole_max, 1, 0);
+
+ initrd_end = initrd_base + initrd_size;
+
+ /* Check limits as specified in booting.txt.
+ * The kernel may have as little as 32 GB of address space to map
+ * system memory and both kernel and initrd must be 1GB aligend.
+ */
+
+ if (_ALIGN_UP(initrd_end, GiB(1)) - _ALIGN_DOWN(image_base, GiB(1)) > GiB(32)) {
+ fprintf(stderr, "kexec: Error: image + initrd too big.\n");
+ return EFAILED;
+ }
+
+ dbgprintf("initrd: base %lx, size %lxh (%ld)\n",
+ initrd_base, initrd_size, initrd_size);
+
+ result = dtb_set_initrd((char **)&dtb.buf,
+ &dtb.size, initrd_base,
+ initrd_base + initrd_size);
+
+ if (result)
+ return EFAILED;
+ }
+ }
+
+ if (!initrd_buf) {
+ /* Don't reuse the initrd addresses from 1st DTB */
+ dtb_clear_initrd((char **)&dtb.buf, &dtb.size);
+ }
+
+ /* Check size limit as specified in booting.txt. */
+
+ if (dtb.size > MiB(2)) {
+ fprintf(stderr, "kexec: Error: dtb too big.\n");
+ return EFAILED;
+ }
+
+ dtb_base = add_buffer_phys_virt(info, dtb.buf, dtb.size, dtb.size,
+ 0, hole_min, hole_max, 1, 0);
+
+ /* dtb_base is valid if we got here. */
+
+ dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb.size,
+ dtb.size);
+
+ elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size,
+ hole_min, hole_max, 1, 0);
+
+ info->entry = (void *)elf_rel_get_addr(&info->rhdr, "purgatory_start");
+
+ elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink,
+ sizeof(purgatory_sink));
+
+ elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", &image_base,
+ sizeof(image_base));
+
+ elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base,
+ sizeof(dtb_base));
+
+ return 0;
+}
+
+/**
+ * virt_to_phys - For processing elf file values.
+ */
+
+unsigned long virt_to_phys(unsigned long v)
+{
+ unsigned long p;
+
+ p = v - get_vp_offset() + get_phys_offset();
+
+ return p;
+}
+
+/**
+ * phys_to_virt - For crashdump setup.
+ */
+
+unsigned long phys_to_virt(struct crash_elf_info *elf_info,
+ unsigned long long p)
+{
+ unsigned long v;
+
+ v = p - get_phys_offset() + elf_info->page_offset;
+
+ return v;
+}
+
+/**
+ * add_segment - Use virt_to_phys when loading elf files.
+ */
+
+void add_segment(struct kexec_info *info, const void *buf, size_t bufsz,
+ unsigned long base, size_t memsz)
+{
+ add_segment_phys_virt(info, buf, bufsz, base, memsz, 1);
+}
+
+static inline void set_phys_offset(int64_t v, char *set_method)
+{
+ if (arm64_mem.phys_offset == arm64_mem_ngv
+ || v < arm64_mem.phys_offset) {
+ arm64_mem.phys_offset = v;
+ dbgprintf("%s: phys_offset : %016lx (method : %s)\n",
+ __func__, arm64_mem.phys_offset,
+ set_method);
+ }
+}
+
+/**
+ * get_va_bits - Helper for getting VA_BITS
+ */
+
+static int get_va_bits(void)
+{
+ unsigned long long stext_sym_addr;
+
+ /*
+ * if already got from kcore
+ */
+ if (va_bits != -1)
+ goto out;
+
+
+ /* For kernel older than v4.19 */
+ fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n");
+ stext_sym_addr = get_kernel_sym("_stext");
+
+ if (stext_sym_addr == 0) {
+ fprintf(stderr, "Can't get the symbol of _stext.\n");
+ return -1;
+ }
+
+ /* Derive va_bits as per arch/arm64/Kconfig */
+ if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) {
+ va_bits = 36;
+ } else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) {
+ va_bits = 39;
+ } else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) {
+ va_bits = 42;
+ } else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) {
+ va_bits = 47;
+ } else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) {
+ va_bits = 48;
+ } else {
+ fprintf(stderr,
+ "Cannot find a proper _stext for calculating VA_BITS\n");
+ return -1;
+ }
+
+out:
+ dbgprintf("va_bits : %d\n", va_bits);
+
+ return 0;
+}
+
+/**
+ * get_page_offset - Helper for getting PAGE_OFFSET
+ */
+
+int get_page_offset(unsigned long *page_offset)
+{
+ unsigned long long text_sym_addr, kernel_va_mid;
+ int ret;
+
+ text_sym_addr = get_kernel_sym("_text");
+ if (text_sym_addr == 0) {
+ fprintf(stderr, "Can't get the symbol of _text to calculate page_offset.\n");
+ return -1;
+ }
+
+ ret = get_va_bits();
+ if (ret < 0)
+ return ret;
+
+ /* Since kernel 5.4, kernel image is put above
+ * UINT64_MAX << (va_bits - 1)
+ */
+ kernel_va_mid = UINT64_MAX << (va_bits - 1);
+ /* older kernel */
+ if (text_sym_addr < kernel_va_mid)
+ *page_offset = UINT64_MAX << (va_bits - 1);
+ else
+ *page_offset = UINT64_MAX << va_bits;
+
+ dbgprintf("page_offset : %lx\n", *page_offset);
+
+ return 0;
+}
+
+static void arm64_scan_vmcoreinfo(char *pos)
+{
+ const char *str;
+
+ str = "NUMBER(VA_BITS)=";
+ if (memcmp(str, pos, strlen(str)) == 0)
+ va_bits = strtoul(pos + strlen(str), NULL, 10);
+}
+
+/**
+ * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET (and va_bits)
+ * from VMCOREINFO note inside 'kcore'.
+ */
+
+static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset)
+{
+ int fd, ret = 0;
+
+ if ((fd = open("/proc/kcore", O_RDONLY)) < 0) {
+ fprintf(stderr, "Can't open (%s).\n", "/proc/kcore");
+ return EFAILED;
+ }
+
+ arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo;
+ ret = read_phys_offset_elf_kcore(fd, phys_offset);
+
+ close(fd);
+ return ret;
+}
+
+/**
+ * get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET
+ * from PT_LOADs inside 'kcore'.
+ */
+
+int get_phys_base_from_pt_load(long *phys_offset)
+{
+ int i, fd, ret;
+ unsigned long long phys_start;
+ unsigned long long virt_start;
+
+ ret = get_page_offset(&page_offset);
+ if (ret < 0)
+ return ret;
+
+ if ((fd = open("/proc/kcore", O_RDONLY)) < 0) {
+ fprintf(stderr, "Can't open (%s).\n", "/proc/kcore");
+ return EFAILED;
+ }
+
+ read_elf(fd);
+
+ for (i = 0; get_pt_load(i,
+ &phys_start, NULL, &virt_start, NULL);
+ i++) {
+ if (virt_start != NOT_KV_ADDR
+ && virt_start >= page_offset
+ && phys_start != NOT_PADDR)
+ *phys_offset = phys_start -
+ (virt_start & ~page_offset);
+ }
+
+ close(fd);
+ return 0;
+}
+
+static bool to_be_excluded(char *str, unsigned long long start, unsigned long long end)
+{
+ if (!strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) {
+ uint64_t load_start, load_end;
+
+ if (!get_crash_kernel_load_range(&load_start, &load_end) &&
+ (load_start == start) && (load_end == end))
+ return false;
+
+ return true;
+ }
+
+ if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) ||
+ !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) ||
+ !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * get_memory_ranges - Try to get the memory ranges from
+ * /proc/iomem.
+ */
+int get_memory_ranges(struct memory_range **range, int *ranges,
+ unsigned long kexec_flags)
+{
+ long phys_offset = -1;
+ FILE *fp;
+ const char *iomem = proc_iomem();
+ char line[MAX_LINE], *str;
+ unsigned long long start, end;
+ int n, consumed;
+ struct memory_ranges memranges;
+ struct memory_range *last, excl_range;
+ int ret;
+
+ if (!try_read_phys_offset_from_kcore) {
+ /* Since kernel version 4.19, 'kcore' contains
+ * a new PT_NOTE which carries the VMCOREINFO
+ * information.
+ * If the same is available, one should prefer the
+ * same to retrieve 'PHYS_OFFSET' value exported by
+ * the kernel as this is now the standard interface
+ * exposed by kernel for sharing machine specific
+ * details with the userland.
+ */
+ ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset);
+ if (!ret) {
+ if (phys_offset != -1)
+ set_phys_offset(phys_offset,
+ "vmcoreinfo pt_note");
+ } else {
+ /* If we are running on a older kernel,
+ * try to retrieve the 'PHYS_OFFSET' value
+ * exported by the kernel in the 'kcore'
+ * file by reading the PT_LOADs and determining
+ * the correct combination.
+ */
+ ret = get_phys_base_from_pt_load(&phys_offset);
+ if (!ret)
+ if (phys_offset != -1)
+ set_phys_offset(phys_offset,
+ "pt_load");
+ }
+
+ try_read_phys_offset_from_kcore = true;
+ }
+
+ fp = fopen(iomem, "r");
+ if (!fp)
+ die("Cannot open %s\n", iomem);
+
+ memranges.ranges = NULL;
+ memranges.size = memranges.max_size = 0;
+
+ while (fgets(line, sizeof(line), fp) != 0) {
+ n = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed);
+ if (n != 2)
+ continue;
+ str = line + consumed;
+
+ if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) {
+ ret = mem_regions_alloc_and_add(&memranges,
+ start, end - start + 1, RANGE_RAM);
+ if (ret) {
+ fprintf(stderr,
+ "Cannot allocate memory for ranges\n");
+ fclose(fp);
+ return -ENOMEM;
+ }
+
+ dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__,
+ memranges.size - 1,
+ memranges.ranges[memranges.size - 1].start,
+ memranges.ranges[memranges.size - 1].end);
+ } else if (to_be_excluded(str, start, end)) {
+ if (!memranges.size)
+ continue;
+
+ /*
+ * Note: mem_regions_exclude() doesn't guarantee
+ * that the ranges are sorted out, but as long as
+ * we cope with /proc/iomem, we only operate on
+ * the last entry and so it is safe.
+ */
+
+ /* The last System RAM range */
+ last = &memranges.ranges[memranges.size - 1];
+
+ if (last->end < start)
+ /* New resource outside of System RAM */
+ continue;
+ if (end < last->start)
+ /* Already excluded by parent resource */
+ continue;
+
+ excl_range.start = start;
+ excl_range.end = end;
+ ret = mem_regions_alloc_and_exclude(&memranges, &excl_range);
+ if (ret) {
+ fprintf(stderr,
+ "Cannot allocate memory for ranges (exclude)\n");
+ fclose(fp);
+ return -ENOMEM;
+ }
+ dbgprintf("%s:- %016llx - %016llx\n",
+ __func__, start, end);
+ }
+ }
+
+ fclose(fp);
+
+ *range = memranges.ranges;
+ *ranges = memranges.size;
+
+ /* As a fallback option, we can try determining the PHYS_OFFSET
+ * value from the '/proc/iomem' entries as well.
+ *
+ * But note that this can be flaky, as on certain arm64
+ * platforms, it has been noticed that due to a hole at the
+ * start of physical ram exposed to kernel
+ * (i.e. it doesn't start from address 0), the kernel still
+ * calculates the 'memstart_addr' kernel variable as 0.
+ *
+ * Whereas the SYSTEM_RAM or IOMEM_RESERVED range in
+ * '/proc/iomem' would carry a first entry whose start address
+ * is non-zero (as the physical ram exposed to the kernel
+ * starts from a non-zero address).
+ *
+ * In such cases, if we rely on '/proc/iomem' entries to
+ * calculate the phys_offset, then we will have mismatch
+ * between the user-space and kernel space 'PHYS_OFFSET'
+ * value.
+ */
+ if (memranges.size)
+ set_phys_offset(memranges.ranges[0].start, "iomem");
+
+ dbgprint_mem_range("System RAM ranges;",
+ memranges.ranges, memranges.size);
+
+ return 0;
+}
+
+int arch_compat_trampoline(struct kexec_info *info)
+{
+ return 0;
+}
+
+int machine_verify_elf_rel(struct mem_ehdr *ehdr)
+{
+ return (ehdr->e_machine == EM_AARCH64);
+}
+
+enum aarch64_rel_type {
+ R_AARCH64_NONE = 0,
+ R_AARCH64_ABS64 = 257,
+ R_AARCH64_PREL32 = 261,
+ R_AARCH64_MOVW_UABS_G0_NC = 264,
+ R_AARCH64_MOVW_UABS_G1_NC = 266,
+ R_AARCH64_MOVW_UABS_G2_NC = 268,
+ R_AARCH64_MOVW_UABS_G3 =269,
+ R_AARCH64_LD_PREL_LO19 = 273,
+ R_AARCH64_ADR_PREL_LO21 = 274,
+ R_AARCH64_ADR_PREL_PG_HI21 = 275,
+ R_AARCH64_ADD_ABS_LO12_NC = 277,
+ R_AARCH64_JUMP26 = 282,
+ R_AARCH64_CALL26 = 283,
+ R_AARCH64_LDST64_ABS_LO12_NC = 286,
+ R_AARCH64_LDST128_ABS_LO12_NC = 299
+};
+
+static uint32_t get_bits(uint32_t value, int start, int end)
+{
+ uint32_t mask = ((uint32_t)1 << (end + 1 - start)) - 1;
+ return (value >> start) & mask;
+}
+
+void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *UNUSED(sym),
+ unsigned long r_type, void *ptr, unsigned long address,
+ unsigned long value)
+{
+ uint64_t *loc64;
+ uint32_t *loc32;
+ uint64_t *location = (uint64_t *)ptr;
+ uint64_t data = *location;
+ uint64_t imm;
+ const char *type = NULL;
+
+ switch((enum aarch64_rel_type)r_type) {
+ case R_AARCH64_ABS64:
+ type = "ABS64";
+ loc64 = ptr;
+ *loc64 = cpu_to_elf64(ehdr, value);
+ break;
+ case R_AARCH64_PREL32:
+ type = "PREL32";
+ loc32 = ptr;
+ *loc32 = cpu_to_elf32(ehdr, value - address);
+ break;
+
+ /* Set a MOV[KZ] immediate field to bits [15:0] of X. No overflow check */
+ case R_AARCH64_MOVW_UABS_G0_NC:
+ type = "MOVW_UABS_G0_NC";
+ loc32 = ptr;
+ imm = get_bits(value, 0, 15);
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
+ break;
+ /* Set a MOV[KZ] immediate field to bits [31:16] of X. No overflow check */
+ case R_AARCH64_MOVW_UABS_G1_NC:
+ type = "MOVW_UABS_G1_NC";
+ loc32 = ptr;
+ imm = get_bits(value, 16, 31);
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
+ break;
+ /* Set a MOV[KZ] immediate field to bits [47:32] of X. No overflow check */
+ case R_AARCH64_MOVW_UABS_G2_NC:
+ type = "MOVW_UABS_G2_NC";
+ loc32 = ptr;
+ imm = get_bits(value, 32, 47);
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
+ break;
+ /* Set a MOV[KZ] immediate field to bits [63:48] of X */
+ case R_AARCH64_MOVW_UABS_G3:
+ type = "MOVW_UABS_G3";
+ loc32 = ptr;
+ imm = get_bits(value, 48, 63);
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5));
+ break;
+
+ case R_AARCH64_LD_PREL_LO19:
+ type = "LD_PREL_LO19";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + (((value - address) << 3) & 0xffffe0));
+ break;
+ case R_AARCH64_ADR_PREL_LO21:
+ if (value & 3)
+ die("%s: ERROR Unaligned value: %lx\n", __func__,
+ value);
+ type = "ADR_PREL_LO21";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + (((value - address) << 3) & 0xffffe0));
+ break;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ type = "ADR_PREL_PG_HI21";
+ imm = ((value & ~0xfff) - (address & ~0xfff)) >> 12;
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + ((imm & 3) << 29) + ((imm & 0x1ffffc) << (5 - 2)));
+ break;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ type = "ADD_ABS_LO12_NC";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + ((value & 0xfff) << 10));
+ break;
+ case R_AARCH64_JUMP26:
+ type = "JUMP26";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + (((value - address) >> 2) & 0x3ffffff));
+ break;
+ case R_AARCH64_CALL26:
+ type = "CALL26";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + (((value - address) >> 2) & 0x3ffffff));
+ break;
+ /* encode imm field with bits [11:3] of value */
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ if (value & 7)
+ die("%s: ERROR Unaligned value: %lx\n", __func__,
+ value);
+ type = "LDST64_ABS_LO12_NC";
+ loc32 = ptr;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32)
+ + ((value & 0xff8) << (10 - 3)));
+ break;
+
+ /* encode imm field with bits [11:4] of value */
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ if (value & 15)
+ die("%s: ERROR Unaligned value: %lx\n", __func__,
+ value);
+ type = "LDST128_ABS_LO12_NC";
+ loc32 = ptr;
+ imm = value & 0xff0;
+ *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << (10 - 4)));
+ break;
+ default:
+ die("%s: ERROR Unknown type: %lu\n", __func__, r_type);
+ break;
+ }
+
+ dbgprintf("%s: %s %016lx->%016lx\n", __func__, type, data, *location);
+}
+
+void arch_reuse_initrd(void)
+{
+ reuse_initrd = 1;
+}
+
+void arch_update_purgatory(struct kexec_info *UNUSED(info))
+{
+}