summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kexec
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
commit7f3a4257159dea8e7ef66d1a539dc6df708b8ed3 (patch)
treebcc69b5f4609f348fac49e2f59e210b29eaea783 /arch/powerpc/kexec
parentAdding upstream version 6.9.12. (diff)
downloadlinux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.tar.xz
linux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.zip
Adding upstream version 6.10.3.upstream/6.10.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/powerpc/kexec')
-rw-r--r--arch/powerpc/kexec/Makefile4
-rw-r--r--arch/powerpc/kexec/core_64.c112
-rw-r--r--arch/powerpc/kexec/crash.c195
-rw-r--r--arch/powerpc/kexec/elf_64.c3
-rw-r--r--arch/powerpc/kexec/file_load_64.c314
-rw-r--r--arch/powerpc/kexec/ranges.c312
6 files changed, 650 insertions, 290 deletions
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 8e469c4da3..470eb0453e 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -3,11 +3,11 @@
# Makefile for the linux kernel.
#
-obj-y += core.o core_$(BITS).o
+obj-y += core.o core_$(BITS).o ranges.o
obj-$(CONFIG_PPC32) += relocate_32.o
-obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_CRASH_DUMP) += crash.o
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 27254624f6..222aa326da 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <linux/hardirq.h>
#include <linux/of.h>
+#include <linux/libfdt.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -31,6 +32,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
+#include <asm/crashdump-ppc64.h>
int machine_kexec_prepare(struct kimage *image)
{
@@ -430,3 +432,113 @@ static int __init export_htab_values(void)
}
late_initcall(export_htab_values);
#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ *
+ * Note: expecting no subnodes under /cpus/<node> with device_type == "cpu".
+ * If this changes, update this function to include them.
+ */
+int update_cpus_node(void *fdt)
+{
+ int prev_node_offset;
+ const char *device_type;
+ const struct fdt_property *prop;
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ prev_node_offset = cpus_offset;
+ /* Delete sub-nodes of /cpus node with device_type == "cpu" */
+ for (cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset); cpus_subnode_offset >= 0;) {
+ /* Ignore nodes that do not have a device_type property or device_type != "cpu" */
+ prop = fdt_get_property(fdt, cpus_subnode_offset, "device_type", NULL);
+ if (!prop || strcmp(prop->data, "cpu")) {
+ prev_node_offset = cpus_subnode_offset;
+ goto next_node;
+ }
+
+ ret = fdt_del_node(fdt, cpus_subnode_offset);
+ if (ret < 0) {
+ pr_err("Failed to delete a cpus sub-node: %s\n", fdt_strerror(ret));
+ return ret;
+ }
+next_node:
+ if (prev_node_offset == cpus_offset)
+ cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset);
+ else
+ cpus_subnode_offset = fdt_next_subnode(fdt, prev_node_offset);
+ }
+
+ cpus_node = of_find_node_by_path("/cpus");
+ /* Fail here to avoid kexec/kdump kernel boot hung */
+ if (!cpus_node) {
+ pr_err("No /cpus node found\n");
+ return -EINVAL;
+ }
+
+ /* Add all /cpus sub-nodes of device_type == "cpu" to FDT */
+ for_each_child_of_node(cpus_node, dn) {
+ /* Ignore device nodes that do not have a device_type property
+ * or device_type != "cpu".
+ */
+ device_type = of_get_property(dn, "device_type", NULL);
+ if (!device_type || strcmp(device_type, "cpu"))
+ continue;
+
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(cpus_node);
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index ef5c2d25ec..9ac3266e49 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -16,6 +16,8 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/types.h>
+#include <linux/libfdt.h>
+#include <linux/memory.h>
#include <asm/processor.h>
#include <asm/machdep.h>
@@ -24,6 +26,7 @@
#include <asm/setjmp.h>
#include <asm/debug.h>
#include <asm/interrupt.h>
+#include <asm/kexec_ranges.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -392,3 +395,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned long phdr_cnt;
+
+ /* A program header for possible CPUs + vmcoreinfo */
+ phdr_cnt = num_possible_cpus() + 1;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
+
+ return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is part of crash memory ranges,
+ * remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
+ goto out;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ goto out;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate growing
+ * number of crash memory ranges while loading the kdump kernel. It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ kvfree(cmem);
+ kvfree(elfbuf);
+}
+
+/**
+ * get_fdt_index - Loop through the kexec segment array and find
+ * the index of the FDT segment.
+ * @image: a pointer to kexec_crash_image
+ *
+ * Returns the index of FDT segment in the kexec segment array
+ * if found; otherwise -1.
+ */
+static int get_fdt_index(struct kimage *image)
+{
+ void *ptr;
+ unsigned long mem;
+ int i, fdt_index = -1;
+
+ /* Find the FDT segment index in kexec segment array. */
+ for (i = 0; i < image->nr_segments; i++) {
+ mem = image->segment[i].mem;
+ ptr = __va(mem);
+
+ if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+ fdt_index = i;
+ break;
+ }
+ }
+
+ return fdt_index;
+}
+
+/**
+ * update_crash_fdt - updates the cpus node of the crash FDT.
+ *
+ * @image: a pointer to kexec_crash_image
+ */
+static void update_crash_fdt(struct kimage *image)
+{
+ void *fdt;
+ int fdt_index;
+
+ fdt_index = get_fdt_index(image);
+ if (fdt_index < 0) {
+ pr_err("Unable to locate FDT segment.\n");
+ return;
+ }
+
+ fdt = __va((void *)image->segment[fdt_index].mem);
+
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* update FDT to reflect changes in CPU resources */
+ if (update_cpus_node(fdt))
+ pr_err("Failed to update crash FDT");
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+}
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
+{
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
+#endif
+ return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT;
+}
+
+/**
+ * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the
+ * necessary kexec segments based on the hotplug event.
+ * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case.
+ *
+ * Update the kdump image based on the type of hotplug event, represented by image->hp_action.
+ * CPU add: Update the FDT segment to include the newly added CPU.
+ * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs
+ * part of the FDT.
+ * Memory add/remove: No action is taken as this is not yet supported.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
+{
+ struct memory_notify *mn;
+
+ switch (image->hp_action) {
+ case KEXEC_CRASH_HP_REMOVE_CPU:
+ return;
+
+ case KEXEC_CRASH_HP_ADD_CPU:
+ update_crash_fdt(image);
+ break;
+
+ case KEXEC_CRASH_HP_REMOVE_MEMORY:
+ case KEXEC_CRASH_HP_ADD_MEMORY:
+ mn = (struct memory_notify *)arg;
+ update_crash_elfcorehdr(image, mn);
+ return;
+ default:
+ pr_warn_once("Unknown hotplug action\n");
+ }
+}
+#endif /* CONFIG_CRASH_HOTPLUG */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 6d8951e8e9..214c071c58 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -116,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
if (ret)
goto out_free_fdt;
- fdt_pack(fdt);
+ if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH)
+ fdt_pack(fdt);
kbuf.buffer = fdt;
kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 1bc65de617..925a69ad24 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -30,6 +30,7 @@
#include <asm/iommu.h>
#include <asm/prom.h>
#include <asm/plpks.h>
+#include <asm/cputhreads.h>
struct umem_info {
__be64 *buf; /* data buffer for usable-memory property */
@@ -48,83 +49,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
/**
- * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
- * regions like opal/rtas, tce-table, initrd,
- * kernel, htab which should be avoided while
- * setting up kexec load segments.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- ret = add_tce_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_initrd_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_htab_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_kernel_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_reserved_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- /* exclude memory ranges should be sorted for easy lookup */
- sort_memory_ranges(*mem_ranges, true);
-out:
- if (ret)
- pr_err("Failed to setup exclude memory ranges\n");
- return ret;
-}
-
-/**
- * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
- * memory regions that should be added to the
- * memory reserve map to ensure the region is
- * protected from any mischief.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_tce_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_reserved_mem_ranges(mem_ranges);
-out:
- if (ret)
- pr_err("Failed to setup reserved memory ranges\n");
- return ret;
-}
-
-/**
* __locate_mem_hole_top_down - Looks top down for a large enough memory hole
* in the memory regions between buf_min & buf_max
* for the buffer. If found, sets kbuf->mem.
@@ -323,119 +247,6 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
#ifdef CONFIG_CRASH_DUMP
/**
- * get_usable_memory_ranges - Get usable memory ranges. This list includes
- * regions like crashkernel, opal/rtas & tce-table,
- * that kdump kernel could use.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- /*
- * Early boot failure observed on guests when low memory (first memory
- * block?) is not added to usable memory. So, add [0, crashk_res.end]
- * instead of [crashk_res.start, crashk_res.end] to workaround it.
- * Also, crashed kernel's memory must be added to reserve map to
- * avoid kdump kernel from using it.
- */
- ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
- if (ret)
- goto out;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_tce_mem_ranges(mem_ranges);
-out:
- if (ret)
- pr_err("Failed to setup usable memory ranges\n");
- return ret;
-}
-
-/**
- * get_crash_memory_ranges - Get crash memory ranges. This list includes
- * first/crashing kernel's memory regions that
- * would be exported via an elfcore.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
-{
- phys_addr_t base, end;
- struct crash_mem *tmem;
- u64 i;
- int ret;
-
- for_each_mem_range(i, &base, &end) {
- u64 size = end - base;
-
- /* Skip backup memory region, which needs a separate entry */
- if (base == BACKUP_SRC_START) {
- if (size > BACKUP_SRC_SIZE) {
- base = BACKUP_SRC_END + 1;
- size -= BACKUP_SRC_SIZE;
- } else
- continue;
- }
-
- ret = add_mem_range(mem_ranges, base, size);
- if (ret)
- goto out;
-
- /* Try merging adjacent ranges before reallocation attempt */
- if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
- sort_memory_ranges(*mem_ranges, true);
- }
-
- /* Reallocate memory ranges if there is no space to split ranges */
- tmem = *mem_ranges;
- if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
- tmem = realloc_mem_ranges(mem_ranges);
- if (!tmem)
- goto out;
- }
-
- /* Exclude crashkernel region */
- ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
- if (ret)
- goto out;
-
- /*
- * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
- * regions are exported to save their context at the time of
- * crash, they should actually be backed up just like the
- * first 64K bytes of memory.
- */
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- /* create a separate program header for the backup region */
- ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
- if (ret)
- goto out;
-
- sort_memory_ranges(*mem_ranges, false);
-out:
- if (ret)
- pr_err("Failed to setup crash memory ranges\n");
- return ret;
-}
-
-/**
* check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
* @um_info: Usable memory buffer and ranges info.
* @cnt: No. of entries to accommodate.
@@ -784,6 +595,23 @@ static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
}
}
+static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem)
+{
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned int extra_sz = 0;
+
+ if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM)
+ pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES);
+ else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES)
+ pr_warn("Configured crash mem ranges may not be enough\n");
+ else
+ extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr);
+
+ return extra_sz;
+#endif
+ return 0;
+}
+
/**
* load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
* segment needed to load kdump kernel.
@@ -815,7 +643,8 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
kbuf->buffer = headers;
kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf->bufsz = kbuf->memsz = headers_sz;
+ kbuf->bufsz = headers_sz;
+ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem);
kbuf->top_down = false;
ret = kexec_add_buffer(kbuf);
@@ -979,6 +808,9 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
unsigned int cpu_nodes, extra_size = 0;
struct device_node *dn;
u64 usm_entries;
+#ifdef CONFIG_CRASH_HOTPLUG
+ unsigned int possible_cpu_nodes;
+#endif
if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
return 0;
@@ -1006,6 +838,19 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
if (cpu_nodes > boot_cpu_node_count)
extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+#ifdef CONFIG_CRASH_HOTPLUG
+ /*
+ * Make sure enough space is reserved to accommodate possible CPU nodes
+ * in the crash FDT. This allows packing possible CPU nodes which are
+ * not yet present in the system without regenerating the entire FDT.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ possible_cpu_nodes = num_possible_cpus() / threads_per_core;
+ if (possible_cpu_nodes > cpu_nodes)
+ extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size();
+ }
+#endif
+
return extra_size;
}
@@ -1028,93 +873,6 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
return extra_size + kdump_extra_fdt_size_ppc64(image);
}
-/**
- * add_node_props - Reads node properties from device node structure and add
- * them to fdt.
- * @fdt: Flattened device tree of the kernel
- * @node_offset: offset of the node to add a property at
- * @dn: device node pointer
- *
- * Returns 0 on success, negative errno on error.
- */
-static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
-{
- int ret = 0;
- struct property *pp;
-
- if (!dn)
- return -EINVAL;
-
- for_each_property_of_node(dn, pp) {
- ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
- if (ret < 0) {
- pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
- return ret;
- }
- }
- return ret;
-}
-
-/**
- * update_cpus_node - Update cpus node of flattened device tree using of_root
- * device node.
- * @fdt: Flattened device tree of the kernel.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int update_cpus_node(void *fdt)
-{
- struct device_node *cpus_node, *dn;
- int cpus_offset, cpus_subnode_offset, ret = 0;
-
- cpus_offset = fdt_path_offset(fdt, "/cpus");
- if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
- pr_err("Malformed device tree: error reading /cpus node: %s\n",
- fdt_strerror(cpus_offset));
- return cpus_offset;
- }
-
- if (cpus_offset > 0) {
- ret = fdt_del_node(fdt, cpus_offset);
- if (ret < 0) {
- pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
- return -EINVAL;
- }
- }
-
- /* Add cpus node to fdt */
- cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
- if (cpus_offset < 0) {
- pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
- return -EINVAL;
- }
-
- /* Add cpus node properties */
- cpus_node = of_find_node_by_path("/cpus");
- ret = add_node_props(fdt, cpus_offset, cpus_node);
- of_node_put(cpus_node);
- if (ret < 0)
- return ret;
-
- /* Loop through all subnodes of cpus and add them to fdt */
- for_each_node_by_type(dn, "cpu") {
- cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
- if (cpus_subnode_offset < 0) {
- pr_err("Unable to add %s subnode: %s\n", dn->full_name,
- fdt_strerror(cpus_subnode_offset));
- ret = cpus_subnode_offset;
- goto out;
- }
-
- ret = add_node_props(fdt, cpus_subnode_offset, dn);
- if (ret < 0)
- goto out;
- }
-out:
- of_node_put(dn);
- return ret;
-}
-
static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
const char *propname)
{
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 33b780049a..3702b0bdab 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -20,9 +20,13 @@
#include <linux/kexec.h>
#include <linux/of.h>
#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/crash_core.h>
#include <asm/sections.h>
#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
/**
* get_max_nr_ranges - Get the max no. of ranges crash_mem structure
* could hold, given the size allocated for it.
@@ -234,13 +238,16 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
return __add_mem_range(mem_ranges, base, size);
}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
/**
* add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
* @mem_ranges: Range list to add the memory range(s) to.
*
* Returns 0 on success, negative errno on error.
*/
-int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
{
struct device_node *dn = NULL;
int ret = 0;
@@ -279,7 +286,7 @@ int add_tce_mem_ranges(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_initrd_mem_range(struct crash_mem **mem_ranges)
+static int add_initrd_mem_range(struct crash_mem **mem_ranges)
{
u64 base, end;
int ret;
@@ -296,7 +303,6 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
return ret;
}
-#ifdef CONFIG_PPC_64S_HASH_MMU
/**
* add_htab_mem_range - Adds htab range to the given memory ranges list,
* if it exists
@@ -304,14 +310,18 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_htab_mem_range(struct crash_mem **mem_ranges)
+static int add_htab_mem_range(struct crash_mem **mem_ranges)
{
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!htab_address)
return 0;
return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
-}
+#else
+ return 0;
#endif
+}
/**
* add_kernel_mem_range - Adds kernel text region to the given
@@ -320,18 +330,20 @@ int add_htab_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_kernel_mem_range(struct crash_mem **mem_ranges)
+static int add_kernel_mem_range(struct crash_mem **mem_ranges)
{
return add_mem_range(mem_ranges, 0, __pa(_end));
}
+#endif /* CONFIG_KEXEC_FILE */
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
/**
* add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
* @mem_ranges: Range list to add the memory range to.
*
* Returns 0 on success, negative errno on error.
*/
-int add_rtas_mem_range(struct crash_mem **mem_ranges)
+static int add_rtas_mem_range(struct crash_mem **mem_ranges)
{
struct device_node *dn;
u32 base, size;
@@ -356,7 +368,7 @@ int add_rtas_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_opal_mem_range(struct crash_mem **mem_ranges)
+static int add_opal_mem_range(struct crash_mem **mem_ranges)
{
struct device_node *dn;
u64 base, size;
@@ -374,7 +386,9 @@ int add_opal_mem_range(struct crash_mem **mem_ranges)
of_node_put(dn);
return ret;
}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+#ifdef CONFIG_KEXEC_FILE
/**
* add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
* to the given memory ranges list.
@@ -382,7 +396,7 @@ int add_opal_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
{
int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
struct device_node *root = of_find_node_by_path("/");
@@ -412,3 +426,283 @@ int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
return ret;
}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ * memory regions that should be added to the
+ * memory reserve map to ensure the region is
+ * protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup reserved memory ranges\n");
+ return ret;
+}
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regions like opal/rtas, tce-table, initrd,
+ * kernel, htab which should be avoided while
+ * setting up kexec load segments.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_initrd_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_htab_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_kernel_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* exclude memory ranges should be sorted for easy lookup */
+ sort_memory_ranges(*mem_ranges, true);
+out:
+ if (ret)
+ pr_err("Failed to setup exclude memory ranges\n");
+ return ret;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ * regions like crashkernel, opal/rtas & tce-table,
+ * that kdump kernel could use.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ /*
+ * Early boot failure observed on guests when low memory (first memory
+ * block?) is not added to usable memory. So, add [0, crashk_res.end]
+ * instead of [crashk_res.start, crashk_res.end] to workaround it.
+ * Also, crashed kernel's memory must be added to reserve map to
+ * avoid kdump kernel from using it.
+ */
+ ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup usable memory ranges\n");
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* CONFIG_KEXEC_FILE */
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ * first/crashing kernel's memory regions that
+ * would be exported via an elfcore.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+ phys_addr_t base, end;
+ struct crash_mem *tmem;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ u64 size = end - base;
+
+ /* Skip backup memory region, which needs a separate entry */
+ if (base == BACKUP_SRC_START) {
+ if (size > BACKUP_SRC_SIZE) {
+ base = BACKUP_SRC_END + 1;
+ size -= BACKUP_SRC_SIZE;
+ } else
+ continue;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ goto out;
+
+ /* Try merging adjacent ranges before reallocation attempt */
+ if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+ sort_memory_ranges(*mem_ranges, true);
+ }
+
+ /* Reallocate memory ranges if there is no space to split ranges */
+ tmem = *mem_ranges;
+ if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+ tmem = realloc_mem_ranges(mem_ranges);
+ if (!tmem)
+ goto out;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ goto out;
+
+ /*
+ * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+ * regions are exported to save their context at the time of
+ * crash, they should actually be backed up just like the
+ * first 64K bytes of memory.
+ */
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* create a separate program header for the backup region */
+ ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+ if (ret)
+ goto out;
+
+ sort_memory_ranges(*mem_ranges, false);
+out:
+ if (ret)
+ pr_err("Failed to setup crash memory ranges\n");
+ return ret;
+}
+
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */