/* * ARM64 kexec. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "kexec.h" #include "kexec-arm64.h" #include "crashdump.h" #include "crashdump-arm64.h" #include "dt-ops.h" #include "fs2dt.h" #include "iomem.h" #include "kexec-syscall.h" #include "mem_regions.h" #include "arch/options.h" #define ROOT_NODE_ADDR_CELLS_DEFAULT 1 #define ROOT_NODE_SIZE_CELLS_DEFAULT 1 #define PROP_ADDR_CELLS "#address-cells" #define PROP_SIZE_CELLS "#size-cells" #define PROP_ELFCOREHDR "linux,elfcorehdr" #define PROP_USABLE_MEM_RANGE "linux,usable-memory-range" #define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36) #define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39) #define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) #define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) #define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) /* Global flag which indicates that we have tried reading * PHYS_OFFSET from 'kcore' already. */ static bool try_read_phys_offset_from_kcore = false; /* Machine specific details. */ static int va_bits = -1; static unsigned long page_offset; /* Global varables the core kexec routines expect. */ unsigned char reuse_initrd; off_t initrd_base; off_t initrd_size; const struct arch_map_entry arches[] = { { "aarch64", KEXEC_ARCH_ARM64 }, { "aarch64_be", KEXEC_ARCH_ARM64 }, { NULL, 0 }, }; struct file_type file_type[] = { {"vmlinux", elf_arm64_probe, elf_arm64_load, elf_arm64_usage}, {"Image", image_arm64_probe, image_arm64_load, image_arm64_usage}, {"uImage", uImage_arm64_probe, uImage_arm64_load, uImage_arm64_usage}, {"vmlinuz", pez_arm64_probe, pez_arm64_load, pez_arm64_usage}, }; int file_types = sizeof(file_type) / sizeof(file_type[0]); /* arm64 global varables. */ struct arm64_opts arm64_opts; struct arm64_mem arm64_mem = { .phys_offset = arm64_mem_ngv, .vp_offset = arm64_mem_ngv, }; uint64_t get_phys_offset(void) { assert(arm64_mem.phys_offset != arm64_mem_ngv); return arm64_mem.phys_offset; } uint64_t get_vp_offset(void) { assert(arm64_mem.vp_offset != arm64_mem_ngv); return arm64_mem.vp_offset; } /** * arm64_process_image_header - Process the arm64 image header. * * Make a guess that KERNEL_IMAGE_SIZE will be enough for older kernels. */ int arm64_process_image_header(const struct arm64_image_header *h) { #if !defined(KERNEL_IMAGE_SIZE) # define KERNEL_IMAGE_SIZE MiB(16) #endif if (!arm64_header_check_magic(h)) return EFAILED; if (h->image_size) { arm64_mem.text_offset = arm64_header_text_offset(h); arm64_mem.image_size = arm64_header_image_size(h); } else { /* For 3.16 and older kernels. */ arm64_mem.text_offset = 0x80000; arm64_mem.image_size = KERNEL_IMAGE_SIZE; fprintf(stderr, "kexec: %s: Warning: Kernel image size set to %lu MiB.\n" " Please verify compatability with lodaed kernel.\n", __func__, KERNEL_IMAGE_SIZE / 1024UL / 1024UL); } return 0; } void arch_usage(void) { printf(arm64_opts_usage); } int arch_process_options(int argc, char **argv) { static const char short_options[] = KEXEC_OPT_STR ""; static const struct option options[] = { KEXEC_ARCH_OPTIONS { 0 } }; int opt; char *cmdline = NULL; const char *append = NULL; int do_kexec_file_syscall = 0; for (opt = 0; opt != -1; ) { opt = getopt_long(argc, argv, short_options, options, 0); switch (opt) { case OPT_APPEND: append = optarg; break; case OPT_REUSE_CMDLINE: cmdline = get_command_line(); break; case OPT_DTB: arm64_opts.dtb = optarg; break; case OPT_INITRD: arm64_opts.initrd = optarg; break; case OPT_KEXEC_FILE_SYSCALL: do_kexec_file_syscall = 1; case OPT_SERIAL: arm64_opts.console = optarg; break; default: break; /* Ignore core and unknown options. */ } } arm64_opts.command_line = concat_cmdline(cmdline, append); dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, arm64_opts.command_line); dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, arm64_opts.initrd); dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" : arm64_opts.dtb)); dbgprintf("%s:%d: console: %s\n", __func__, __LINE__, arm64_opts.console); if (do_kexec_file_syscall) arm64_opts.dtb = NULL; return 0; } /** * find_purgatory_sink - Find a sink for purgatory output. */ static uint64_t find_purgatory_sink(const char *console) { int fd, ret; char device[255], mem[255]; struct stat sb; char buffer[10]; uint64_t iomem = 0x0; if (!console) return 0; ret = snprintf(device, sizeof(device), "/sys/class/tty/%s", console); if (ret < 0 || ret >= sizeof(device)) { fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); return 0; } if (stat(device, &sb) || !S_ISDIR(sb.st_mode)) { fprintf(stderr, "kexec: %s: No valid console found for %s\n", __func__, device); return 0; } ret = snprintf(mem, sizeof(mem), "%s%s", device, "/iomem_base"); if (ret < 0 || ret >= sizeof(mem)) { fprintf(stderr, "snprintf failed: %s\n", strerror(errno)); return 0; } printf("console memory read from %s\n", mem); fd = open(mem, O_RDONLY); if (fd < 0) { fprintf(stderr, "kexec: %s: No able to open %s\n", __func__, mem); return 0; } memset(buffer, '\0', sizeof(buffer)); ret = read(fd, buffer, sizeof(buffer)); if (ret < 0) { fprintf(stderr, "kexec: %s: not able to read fd\n", __func__); close(fd); return 0; } sscanf(buffer, "%lx", &iomem); printf("console memory is at %#lx\n", iomem); close(fd); return iomem; } /** * struct dtb - Info about a binary device tree. * * @buf: Device tree data. * @size: Device tree data size. * @name: Shorthand name of this dtb for messages. * @path: Filesystem path. */ struct dtb { char *buf; off_t size; const char *name; const char *path; }; /** * dump_reservemap - Dump the dtb's reservemap. */ static void dump_reservemap(const struct dtb *dtb) { int i; for (i = 0; ; i++) { uint64_t address; uint64_t size; fdt_get_mem_rsv(dtb->buf, i, &address, &size); if (!size) break; dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__, dtb->name, address, size); } } /** * set_bootargs - Set the dtb's bootargs. */ static int set_bootargs(struct dtb *dtb, const char *command_line) { int result; if (!command_line || !command_line[0]) return 0; result = dtb_set_bootargs(&dtb->buf, &dtb->size, command_line); if (result) { fprintf(stderr, "kexec: Set device tree bootargs failed.\n"); return EFAILED; } return 0; } /** * read_proc_dtb - Read /proc/device-tree. */ static int read_proc_dtb(struct dtb *dtb) { int result; struct stat s; static const char path[] = "/proc/device-tree"; result = stat(path, &s); if (result) { dbgprintf("%s: %s\n", __func__, strerror(errno)); return EFAILED; } dtb->path = path; create_flatten_tree((char **)&dtb->buf, &dtb->size, NULL); return 0; } /** * read_sys_dtb - Read /sys/firmware/fdt. */ static int read_sys_dtb(struct dtb *dtb) { int result; struct stat s; static const char path[] = "/sys/firmware/fdt"; result = stat(path, &s); if (result) { dbgprintf("%s: %s\n", __func__, strerror(errno)); return EFAILED; } dtb->path = path; dtb->buf = slurp_file(path, &dtb->size); return 0; } /** * read_1st_dtb - Read the 1st stage kernel's dtb. */ static int read_1st_dtb(struct dtb *dtb) { int result; dtb->name = "dtb_sys"; result = read_sys_dtb(dtb); if (!result) goto on_success; dtb->name = "dtb_proc"; result = read_proc_dtb(dtb); if (!result) goto on_success; dbgprintf("%s: not found\n", __func__); return EFAILED; on_success: dbgprintf("%s: found %s\n", __func__, dtb->path); return 0; } static int get_cells_size(void *fdt, uint32_t *address_cells, uint32_t *size_cells) { int nodeoffset; const uint32_t *prop = NULL; int prop_len; /* default values */ *address_cells = ROOT_NODE_ADDR_CELLS_DEFAULT; *size_cells = ROOT_NODE_SIZE_CELLS_DEFAULT; /* under root node */ nodeoffset = fdt_path_offset(fdt, "/"); if (nodeoffset < 0) goto on_error; prop = fdt_getprop(fdt, nodeoffset, PROP_ADDR_CELLS, &prop_len); if (prop) { if (prop_len == sizeof(*prop)) *address_cells = fdt32_to_cpu(*prop); else goto on_error; } prop = fdt_getprop(fdt, nodeoffset, PROP_SIZE_CELLS, &prop_len); if (prop) { if (prop_len == sizeof(*prop)) *size_cells = fdt32_to_cpu(*prop); else goto on_error; } dbgprintf("%s: #address-cells:%d #size-cells:%d\n", __func__, *address_cells, *size_cells); return 0; on_error: return EFAILED; } static bool cells_size_fitted(uint32_t address_cells, uint32_t size_cells, struct memory_range *range) { dbgprintf("%s: %llx-%llx\n", __func__, range->start, range->end); /* if *_cells >= 2, cells can hold 64-bit values anyway */ if ((address_cells == 1) && (range->start >= (1ULL << 32))) return false; if ((size_cells == 1) && ((range->end - range->start + 1) >= (1ULL << 32))) return false; return true; } static void fill_property(void *buf, uint64_t val, uint32_t cells) { uint32_t val32; int i; if (cells == 1) { val32 = cpu_to_fdt32((uint32_t)val); memcpy(buf, &val32, sizeof(uint32_t)); } else { for (i = 0; i < (cells * sizeof(uint32_t) - sizeof(uint64_t)); i++) *(char *)buf++ = 0; val = cpu_to_fdt64(val); memcpy(buf, &val, sizeof(uint64_t)); } } static int fdt_setprop_ranges(void *fdt, int nodeoffset, const char *name, struct memory_range *ranges, int nr_ranges, bool reverse, uint32_t address_cells, uint32_t size_cells) { void *buf, *prop; size_t buf_size; int i, result; struct memory_range *range; buf_size = (address_cells + size_cells) * sizeof(uint32_t) * nr_ranges; prop = buf = xmalloc(buf_size); if (!buf) return -ENOMEM; for (i = 0; i < nr_ranges; i++) { if (reverse) range = ranges + (nr_ranges - 1 - i); else range = ranges + i; fill_property(prop, range->start, address_cells); prop += address_cells * sizeof(uint32_t); fill_property(prop, range->end - range->start + 1, size_cells); prop += size_cells * sizeof(uint32_t); } result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size); free(buf); return result; } /** * setup_2nd_dtb - Setup the 2nd stage kernel's dtb. */ static int setup_2nd_dtb(struct dtb *dtb, char *command_line, int on_crash) { uint32_t address_cells, size_cells; uint64_t fdt_val64; uint64_t *prop; char *new_buf = NULL; int len, range_len; int nodeoffset; int new_size; int i, result, kaslr_seed; result = fdt_check_header(dtb->buf); if (result) { fprintf(stderr, "kexec: Invalid 2nd device tree.\n"); return EFAILED; } result = set_bootargs(dtb, command_line); if (result) { fprintf(stderr, "kexec: cannot set bootargs.\n"); result = -EINVAL; goto on_error; } /* determine #address-cells and #size-cells */ result = get_cells_size(dtb->buf, &address_cells, &size_cells); if (result) { fprintf(stderr, "kexec: cannot determine cells-size.\n"); result = -EINVAL; goto on_error; } if (!cells_size_fitted(address_cells, size_cells, &elfcorehdr_mem)) { fprintf(stderr, "kexec: elfcorehdr doesn't fit cells-size.\n"); result = -EINVAL; goto on_error; } for (i = 0; i < usablemem_rgns.size; i++) { if (!cells_size_fitted(address_cells, size_cells, &crash_reserved_mem[i])) { fprintf(stderr, "kexec: usable memory range doesn't fit cells-size.\n"); result = -EINVAL; goto on_error; } } /* duplicate dt blob */ range_len = sizeof(uint32_t) * (address_cells + size_cells); new_size = fdt_totalsize(dtb->buf) + fdt_prop_len(PROP_ELFCOREHDR, range_len) + fdt_prop_len(PROP_USABLE_MEM_RANGE, range_len * usablemem_rgns.size); new_buf = xmalloc(new_size); result = fdt_open_into(dtb->buf, new_buf, new_size); if (result) { dbgprintf("%s: fdt_open_into failed: %s\n", __func__, fdt_strerror(result)); result = -ENOSPC; goto on_error; } /* fixup 'kaslr-seed' with a random value, if supported */ nodeoffset = fdt_path_offset(new_buf, "/chosen"); prop = fdt_getprop_w(new_buf, nodeoffset, "kaslr-seed", &len); if (!prop || len != sizeof(uint64_t)) { dbgprintf("%s: no kaslr-seed found\n", __func__); /* for kexec warm reboot case, we don't need to fixup * other dtb properties */ if (!on_crash) { dump_reservemap(dtb); if (new_buf) free(new_buf); return result; } } else { kaslr_seed = fdt64_to_cpu(*prop); /* kaslr_seed must be wiped clean by primary * kernel during boot */ if (kaslr_seed != 0) { dbgprintf("%s: kaslr-seed is not wiped to 0.\n", __func__); result = -EINVAL; goto on_error; } /* * Invoke the getrandom system call with * GRND_NONBLOCK, to make sure we * have a valid random seed to pass to the * secondary kernel. */ result = syscall(SYS_getrandom, &fdt_val64, sizeof(fdt_val64), GRND_NONBLOCK); if(result == -1) { fprintf(stderr, "%s: Reading random bytes failed.\n", __func__); /* Currently on some arm64 platforms this * 'getrandom' system call fails while booting * the platform. * * In case, this happens at best we can set * the 'kaslr_seed' as 0, indicating that the * 2nd kernel will be booted with a 'nokaslr' * like behaviour. */ fdt_val64 = 0UL; dbgprintf("%s: Disabling KASLR in secondary kernel.\n", __func__); } nodeoffset = fdt_path_offset(new_buf, "/chosen"); result = fdt_setprop_inplace(new_buf, nodeoffset, "kaslr-seed", &fdt_val64, sizeof(fdt_val64)); if (result) { dbgprintf("%s: fdt_setprop failed: %s\n", __func__, fdt_strerror(result)); result = -EINVAL; goto on_error; } } if (on_crash) { /* add linux,elfcorehdr */ nodeoffset = fdt_path_offset(new_buf, "/chosen"); result = fdt_setprop_ranges(new_buf, nodeoffset, PROP_ELFCOREHDR, &elfcorehdr_mem, 1, false, address_cells, size_cells); if (result) { dbgprintf("%s: fdt_setprop failed: %s\n", __func__, fdt_strerror(result)); result = -EINVAL; goto on_error; } /* * add linux,usable-memory-range * * crash dump kernel support one or two regions, to make * compatibility with existing user-space and older kdump, the * low region is always the last one. */ nodeoffset = fdt_path_offset(new_buf, "/chosen"); result = fdt_setprop_ranges(new_buf, nodeoffset, PROP_USABLE_MEM_RANGE, usablemem_rgns.ranges, usablemem_rgns.size, true, address_cells, size_cells); if (result) { dbgprintf("%s: fdt_setprop failed: %s\n", __func__, fdt_strerror(result)); result = -EINVAL; goto on_error; } } fdt_pack(new_buf); dtb->buf = new_buf; dtb->size = fdt_totalsize(new_buf); dump_reservemap(dtb); return result; on_error: fprintf(stderr, "kexec: %s failed.\n", __func__); if (new_buf) free(new_buf); return result; } unsigned long arm64_locate_kernel_segment(struct kexec_info *info) { unsigned long hole; if (info->kexec_flags & KEXEC_ON_CRASH) { unsigned long hole_end; hole = (crash_reserved_mem[usablemem_rgns.size - 1].start < mem_min ? mem_min : crash_reserved_mem[usablemem_rgns.size - 1].start); hole = _ALIGN_UP(hole, MiB(2)); hole_end = hole + arm64_mem.text_offset + arm64_mem.image_size; if ((hole_end > mem_max) || (hole_end > crash_reserved_mem[usablemem_rgns.size - 1].end)) { dbgprintf("%s: Crash kernel out of range\n", __func__); hole = ULONG_MAX; } } else { hole = locate_hole(info, arm64_mem.text_offset + arm64_mem.image_size, MiB(2), 0, ULONG_MAX, 1); if (hole == ULONG_MAX) dbgprintf("%s: locate_hole failed\n", __func__); } return hole; } /** * arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments. */ int arm64_load_other_segments(struct kexec_info *info, unsigned long image_base) { int result; unsigned long dtb_base; unsigned long hole_min; unsigned long hole_max; unsigned long initrd_end; uint64_t purgatory_sink; char *initrd_buf = NULL; struct dtb dtb; char command_line[COMMAND_LINE_SIZE] = ""; if (arm64_opts.command_line) { if (strlen(arm64_opts.command_line) > sizeof(command_line) - 1) { fprintf(stderr, "Kernel command line too long for kernel!\n"); return EFAILED; } strncpy(command_line, arm64_opts.command_line, sizeof(command_line) - 1); command_line[sizeof(command_line) - 1] = 0; } purgatory_sink = find_purgatory_sink(arm64_opts.console); dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__, purgatory_sink); if (arm64_opts.dtb) { dtb.name = "dtb_user"; dtb.buf = slurp_file(arm64_opts.dtb, &dtb.size); } else { result = read_1st_dtb(&dtb); if (result) { fprintf(stderr, "kexec: Error: No device tree available.\n"); return EFAILED; } } result = setup_2nd_dtb(&dtb, command_line, info->kexec_flags & KEXEC_ON_CRASH); if (result) return EFAILED; /* Put the other segments after the image. */ hole_min = image_base + arm64_mem.image_size; if (info->kexec_flags & KEXEC_ON_CRASH) hole_max = crash_reserved_mem[usablemem_rgns.size - 1].end; else hole_max = ULONG_MAX; if (arm64_opts.initrd) { initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size); if (!initrd_buf) fprintf(stderr, "kexec: Empty ramdisk file.\n"); else { /* Put the initrd after the kernel. */ initrd_base = add_buffer_phys_virt(info, initrd_buf, initrd_size, initrd_size, 0, hole_min, hole_max, 1, 0); initrd_end = initrd_base + initrd_size; /* Check limits as specified in booting.txt. * The kernel may have as little as 32 GB of address space to map * system memory and both kernel and initrd must be 1GB aligend. */ if (_ALIGN_UP(initrd_end, GiB(1)) - _ALIGN_DOWN(image_base, GiB(1)) > GiB(32)) { fprintf(stderr, "kexec: Error: image + initrd too big.\n"); return EFAILED; } dbgprintf("initrd: base %lx, size %lxh (%ld)\n", initrd_base, initrd_size, initrd_size); result = dtb_set_initrd((char **)&dtb.buf, &dtb.size, initrd_base, initrd_base + initrd_size); if (result) return EFAILED; } } if (!initrd_buf) { /* Don't reuse the initrd addresses from 1st DTB */ dtb_clear_initrd((char **)&dtb.buf, &dtb.size); } /* Check size limit as specified in booting.txt. */ if (dtb.size > MiB(2)) { fprintf(stderr, "kexec: Error: dtb too big.\n"); return EFAILED; } dtb_base = add_buffer_phys_virt(info, dtb.buf, dtb.size, dtb.size, 0, hole_min, hole_max, 1, 0); /* dtb_base is valid if we got here. */ dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb.size, dtb.size); elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, hole_min, hole_max, 1, 0); info->entry = (void *)elf_rel_get_addr(&info->rhdr, "purgatory_start"); elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink, sizeof(purgatory_sink)); elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", &image_base, sizeof(image_base)); elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base, sizeof(dtb_base)); return 0; } /** * virt_to_phys - For processing elf file values. */ unsigned long virt_to_phys(unsigned long v) { unsigned long p; p = v - get_vp_offset() + get_phys_offset(); return p; } /** * phys_to_virt - For crashdump setup. */ unsigned long phys_to_virt(struct crash_elf_info *elf_info, unsigned long long p) { unsigned long v; v = p - get_phys_offset() + elf_info->page_offset; return v; } /** * add_segment - Use virt_to_phys when loading elf files. */ void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, unsigned long base, size_t memsz) { add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); } static inline void set_phys_offset(int64_t v, char *set_method) { if (arm64_mem.phys_offset == arm64_mem_ngv || v < arm64_mem.phys_offset) { arm64_mem.phys_offset = v; dbgprintf("%s: phys_offset : %016lx (method : %s)\n", __func__, arm64_mem.phys_offset, set_method); } } /** * get_va_bits - Helper for getting VA_BITS */ static int get_va_bits(void) { unsigned long long stext_sym_addr; /* * if already got from kcore */ if (va_bits != -1) goto out; /* For kernel older than v4.19 */ fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n"); stext_sym_addr = get_kernel_sym("_stext"); if (stext_sym_addr == 0) { fprintf(stderr, "Can't get the symbol of _stext.\n"); return -1; } /* Derive va_bits as per arch/arm64/Kconfig */ if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) { va_bits = 36; } else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) { va_bits = 39; } else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) { va_bits = 42; } else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) { va_bits = 47; } else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) { va_bits = 48; } else { fprintf(stderr, "Cannot find a proper _stext for calculating VA_BITS\n"); return -1; } out: dbgprintf("va_bits : %d\n", va_bits); return 0; } /** * get_page_offset - Helper for getting PAGE_OFFSET */ int get_page_offset(unsigned long *page_offset) { unsigned long long text_sym_addr, kernel_va_mid; int ret; text_sym_addr = get_kernel_sym("_text"); if (text_sym_addr == 0) { fprintf(stderr, "Can't get the symbol of _text to calculate page_offset.\n"); return -1; } ret = get_va_bits(); if (ret < 0) return ret; /* Since kernel 5.4, kernel image is put above * UINT64_MAX << (va_bits - 1) */ kernel_va_mid = UINT64_MAX << (va_bits - 1); /* older kernel */ if (text_sym_addr < kernel_va_mid) *page_offset = UINT64_MAX << (va_bits - 1); else *page_offset = UINT64_MAX << va_bits; dbgprintf("page_offset : %lx\n", *page_offset); return 0; } static void arm64_scan_vmcoreinfo(char *pos) { const char *str; str = "NUMBER(VA_BITS)="; if (memcmp(str, pos, strlen(str)) == 0) va_bits = strtoul(pos + strlen(str), NULL, 10); } /** * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET (and va_bits) * from VMCOREINFO note inside 'kcore'. */ static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset) { int fd, ret = 0; if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); return EFAILED; } arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo; ret = read_phys_offset_elf_kcore(fd, phys_offset); close(fd); return ret; } /** * get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET * from PT_LOADs inside 'kcore'. */ int get_phys_base_from_pt_load(long *phys_offset) { int i, fd, ret; unsigned long long phys_start; unsigned long long virt_start; ret = get_page_offset(&page_offset); if (ret < 0) return ret; if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); return EFAILED; } read_elf(fd); for (i = 0; get_pt_load(i, &phys_start, NULL, &virt_start, NULL); i++) { if (virt_start != NOT_KV_ADDR && virt_start >= page_offset && phys_start != NOT_PADDR) *phys_offset = phys_start - (virt_start & ~page_offset); } close(fd); return 0; } static bool to_be_excluded(char *str, unsigned long long start, unsigned long long end) { if (!strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) { uint64_t load_start, load_end; if (!get_crash_kernel_load_range(&load_start, &load_end) && (load_start == start) && (load_end == end)) return false; return true; } if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA))) return false; else return true; } /** * get_memory_ranges - Try to get the memory ranges from * /proc/iomem. */ int get_memory_ranges(struct memory_range **range, int *ranges, unsigned long kexec_flags) { long phys_offset = -1; FILE *fp; const char *iomem = proc_iomem(); char line[MAX_LINE], *str; unsigned long long start, end; int n, consumed; struct memory_ranges memranges; struct memory_range *last, excl_range; int ret; if (!try_read_phys_offset_from_kcore) { /* Since kernel version 4.19, 'kcore' contains * a new PT_NOTE which carries the VMCOREINFO * information. * If the same is available, one should prefer the * same to retrieve 'PHYS_OFFSET' value exported by * the kernel as this is now the standard interface * exposed by kernel for sharing machine specific * details with the userland. */ ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset); if (!ret) { if (phys_offset != -1) set_phys_offset(phys_offset, "vmcoreinfo pt_note"); } else { /* If we are running on a older kernel, * try to retrieve the 'PHYS_OFFSET' value * exported by the kernel in the 'kcore' * file by reading the PT_LOADs and determining * the correct combination. */ ret = get_phys_base_from_pt_load(&phys_offset); if (!ret) if (phys_offset != -1) set_phys_offset(phys_offset, "pt_load"); } try_read_phys_offset_from_kcore = true; } fp = fopen(iomem, "r"); if (!fp) die("Cannot open %s\n", iomem); memranges.ranges = NULL; memranges.size = memranges.max_size = 0; while (fgets(line, sizeof(line), fp) != 0) { n = sscanf(line, "%llx-%llx : %n", &start, &end, &consumed); if (n != 2) continue; str = line + consumed; if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) { ret = mem_regions_alloc_and_add(&memranges, start, end - start + 1, RANGE_RAM); if (ret) { fprintf(stderr, "Cannot allocate memory for ranges\n"); fclose(fp); return -ENOMEM; } dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__, memranges.size - 1, memranges.ranges[memranges.size - 1].start, memranges.ranges[memranges.size - 1].end); } else if (to_be_excluded(str, start, end)) { if (!memranges.size) continue; /* * Note: mem_regions_exclude() doesn't guarantee * that the ranges are sorted out, but as long as * we cope with /proc/iomem, we only operate on * the last entry and so it is safe. */ /* The last System RAM range */ last = &memranges.ranges[memranges.size - 1]; if (last->end < start) /* New resource outside of System RAM */ continue; if (end < last->start) /* Already excluded by parent resource */ continue; excl_range.start = start; excl_range.end = end; ret = mem_regions_alloc_and_exclude(&memranges, &excl_range); if (ret) { fprintf(stderr, "Cannot allocate memory for ranges (exclude)\n"); fclose(fp); return -ENOMEM; } dbgprintf("%s:- %016llx - %016llx\n", __func__, start, end); } } fclose(fp); *range = memranges.ranges; *ranges = memranges.size; /* As a fallback option, we can try determining the PHYS_OFFSET * value from the '/proc/iomem' entries as well. * * But note that this can be flaky, as on certain arm64 * platforms, it has been noticed that due to a hole at the * start of physical ram exposed to kernel * (i.e. it doesn't start from address 0), the kernel still * calculates the 'memstart_addr' kernel variable as 0. * * Whereas the SYSTEM_RAM or IOMEM_RESERVED range in * '/proc/iomem' would carry a first entry whose start address * is non-zero (as the physical ram exposed to the kernel * starts from a non-zero address). * * In such cases, if we rely on '/proc/iomem' entries to * calculate the phys_offset, then we will have mismatch * between the user-space and kernel space 'PHYS_OFFSET' * value. */ if (memranges.size) set_phys_offset(memranges.ranges[0].start, "iomem"); dbgprint_mem_range("System RAM ranges;", memranges.ranges, memranges.size); return 0; } int arch_compat_trampoline(struct kexec_info *info) { return 0; } int machine_verify_elf_rel(struct mem_ehdr *ehdr) { return (ehdr->e_machine == EM_AARCH64); } enum aarch64_rel_type { R_AARCH64_NONE = 0, R_AARCH64_ABS64 = 257, R_AARCH64_PREL32 = 261, R_AARCH64_MOVW_UABS_G0_NC = 264, R_AARCH64_MOVW_UABS_G1_NC = 266, R_AARCH64_MOVW_UABS_G2_NC = 268, R_AARCH64_MOVW_UABS_G3 =269, R_AARCH64_LD_PREL_LO19 = 273, R_AARCH64_ADR_PREL_LO21 = 274, R_AARCH64_ADR_PREL_PG_HI21 = 275, R_AARCH64_ADD_ABS_LO12_NC = 277, R_AARCH64_JUMP26 = 282, R_AARCH64_CALL26 = 283, R_AARCH64_LDST64_ABS_LO12_NC = 286, R_AARCH64_LDST128_ABS_LO12_NC = 299 }; static uint32_t get_bits(uint32_t value, int start, int end) { uint32_t mask = ((uint32_t)1 << (end + 1 - start)) - 1; return (value >> start) & mask; } void machine_apply_elf_rel(struct mem_ehdr *ehdr, struct mem_sym *UNUSED(sym), unsigned long r_type, void *ptr, unsigned long address, unsigned long value) { uint64_t *loc64; uint32_t *loc32; uint64_t *location = (uint64_t *)ptr; uint64_t data = *location; uint64_t imm; const char *type = NULL; switch((enum aarch64_rel_type)r_type) { case R_AARCH64_ABS64: type = "ABS64"; loc64 = ptr; *loc64 = cpu_to_elf64(ehdr, value); break; case R_AARCH64_PREL32: type = "PREL32"; loc32 = ptr; *loc32 = cpu_to_elf32(ehdr, value - address); break; /* Set a MOV[KZ] immediate field to bits [15:0] of X. No overflow check */ case R_AARCH64_MOVW_UABS_G0_NC: type = "MOVW_UABS_G0_NC"; loc32 = ptr; imm = get_bits(value, 0, 15); *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); break; /* Set a MOV[KZ] immediate field to bits [31:16] of X. No overflow check */ case R_AARCH64_MOVW_UABS_G1_NC: type = "MOVW_UABS_G1_NC"; loc32 = ptr; imm = get_bits(value, 16, 31); *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); break; /* Set a MOV[KZ] immediate field to bits [47:32] of X. No overflow check */ case R_AARCH64_MOVW_UABS_G2_NC: type = "MOVW_UABS_G2_NC"; loc32 = ptr; imm = get_bits(value, 32, 47); *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); break; /* Set a MOV[KZ] immediate field to bits [63:48] of X */ case R_AARCH64_MOVW_UABS_G3: type = "MOVW_UABS_G3"; loc32 = ptr; imm = get_bits(value, 48, 63); *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << 5)); break; case R_AARCH64_LD_PREL_LO19: type = "LD_PREL_LO19"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (((value - address) << 3) & 0xffffe0)); break; case R_AARCH64_ADR_PREL_LO21: if (value & 3) die("%s: ERROR Unaligned value: %lx\n", __func__, value); type = "ADR_PREL_LO21"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (((value - address) << 3) & 0xffffe0)); break; case R_AARCH64_ADR_PREL_PG_HI21: type = "ADR_PREL_PG_HI21"; imm = ((value & ~0xfff) - (address & ~0xfff)) >> 12; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + ((imm & 3) << 29) + ((imm & 0x1ffffc) << (5 - 2))); break; case R_AARCH64_ADD_ABS_LO12_NC: type = "ADD_ABS_LO12_NC"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + ((value & 0xfff) << 10)); break; case R_AARCH64_JUMP26: type = "JUMP26"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (((value - address) >> 2) & 0x3ffffff)); break; case R_AARCH64_CALL26: type = "CALL26"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (((value - address) >> 2) & 0x3ffffff)); break; /* encode imm field with bits [11:3] of value */ case R_AARCH64_LDST64_ABS_LO12_NC: if (value & 7) die("%s: ERROR Unaligned value: %lx\n", __func__, value); type = "LDST64_ABS_LO12_NC"; loc32 = ptr; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + ((value & 0xff8) << (10 - 3))); break; /* encode imm field with bits [11:4] of value */ case R_AARCH64_LDST128_ABS_LO12_NC: if (value & 15) die("%s: ERROR Unaligned value: %lx\n", __func__, value); type = "LDST128_ABS_LO12_NC"; loc32 = ptr; imm = value & 0xff0; *loc32 = cpu_to_le32(le32_to_cpu(*loc32) + (imm << (10 - 4))); break; default: die("%s: ERROR Unknown type: %lu\n", __func__, r_type); break; } dbgprintf("%s: %s %016lx->%016lx\n", __func__, type, data, *location); } void arch_reuse_initrd(void) { reuse_initrd = 1; } void arch_update_purgatory(struct kexec_info *UNUSED(info)) { }