1510 lines
38 KiB
C
1510 lines
38 KiB
C
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
|
|
/*
|
|
* skiboot C entry point
|
|
*
|
|
* Copyright 2013-2019 IBM Corp.
|
|
*/
|
|
|
|
#include <skiboot.h>
|
|
#include <slw.h>
|
|
#include <psi.h>
|
|
#include <chiptod.h>
|
|
#include <nx.h>
|
|
#include <cpu.h>
|
|
#include <processor.h>
|
|
#include <xscom.h>
|
|
#include <opal.h>
|
|
#include <opal-msg.h>
|
|
#include <elf.h>
|
|
#include <elf-abi.h>
|
|
#include <io.h>
|
|
#include <cec.h>
|
|
#include <device.h>
|
|
#include <pci.h>
|
|
#include <lpc.h>
|
|
#include <i2c.h>
|
|
#include <chip.h>
|
|
#include <interrupts.h>
|
|
#include <mem_region.h>
|
|
#include <trace.h>
|
|
#include <console.h>
|
|
#include <fsi-master.h>
|
|
#include <centaur.h>
|
|
#include <ocmb.h>
|
|
#include <libfdt/libfdt.h>
|
|
#include <timer.h>
|
|
#include <ipmi.h>
|
|
#include <pldm.h>
|
|
#include <sensor.h>
|
|
#include <xive.h>
|
|
#include <nvram.h>
|
|
#include <vas.h>
|
|
#include <libstb/secureboot.h>
|
|
#include <libstb/trustedboot.h>
|
|
#include <phys-map.h>
|
|
#include <imc.h>
|
|
#include <dts.h>
|
|
#include <dio-p9.h>
|
|
#include <sbe-p9.h>
|
|
#include <debug_descriptor.h>
|
|
#include <occ.h>
|
|
#include <opal-dump.h>
|
|
#include <xscom-p9-regs.h>
|
|
#include <xscom-p10-regs.h>
|
|
|
|
enum proc_gen proc_gen;
|
|
unsigned int pcie_max_link_speed;
|
|
bool pci_tracing;
|
|
bool verbose_eeh;
|
|
extern const char version[];
|
|
|
|
static uint64_t kernel_entry;
|
|
static size_t kernel_size;
|
|
static bool kernel_32bit;
|
|
|
|
/* We backup the previous vectors here before copying our own */
|
|
static uint8_t old_vectors[EXCEPTION_VECTORS_END];
|
|
|
|
#ifdef DEBUG
|
|
#define DEBUG_STR "-debug"
|
|
#else
|
|
#define DEBUG_STR ""
|
|
#endif
|
|
|
|
#ifdef SKIBOOT_GCOV
|
|
void skiboot_gcov_done(void);
|
|
#endif
|
|
|
|
struct debug_descriptor debug_descriptor = {
|
|
.eye_catcher = "OPALdbug",
|
|
.version = CPU_TO_BE32(DEBUG_DESC_VERSION),
|
|
.state_flags = 0,
|
|
.memcons_phys = 0, /* cpu_to_be64(&memcons) can't init constant */
|
|
.trace_mask = 0, /* All traces disabled by default */
|
|
/* console log level:
|
|
* high 4 bits in memory, low 4 bits driver (e.g. uart). */
|
|
#ifdef DEBUG
|
|
.console_log_levels = (PR_TRACE << 4) | PR_DEBUG,
|
|
#else
|
|
.console_log_levels = (PR_DEBUG << 4) | PR_NOTICE,
|
|
#endif
|
|
};
|
|
|
|
static void checksum_romem(void);
|
|
|
|
static bool try_load_elf64_le(struct elf_hdr *header)
|
|
{
|
|
struct elf64le_hdr *kh = (struct elf64le_hdr *)header;
|
|
uint64_t load_base = (uint64_t)kh;
|
|
struct elf64le_phdr *ph;
|
|
unsigned int i;
|
|
|
|
printf("INIT: 64-bit LE kernel discovered\n");
|
|
|
|
/* Look for a loadable program header that has our entry in it
|
|
*
|
|
* Note that we execute the kernel in-place, we don't actually
|
|
* obey the load informations in the headers. This is expected
|
|
* to work for the Linux Kernel because it's a fairly dumb ELF
|
|
* but it will not work for any ELF binary.
|
|
*/
|
|
ph = (struct elf64le_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
|
|
for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
|
|
if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
|
|
continue;
|
|
if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) ||
|
|
(le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) <
|
|
le64_to_cpu(kh->e_entry))
|
|
continue;
|
|
|
|
/* Get our entry */
|
|
kernel_entry = le64_to_cpu(kh->e_entry) -
|
|
le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset);
|
|
break;
|
|
}
|
|
|
|
if (!kernel_entry) {
|
|
prerror("INIT: Failed to find kernel entry !\n");
|
|
return false;
|
|
}
|
|
kernel_entry += load_base;
|
|
kernel_32bit = false;
|
|
|
|
kernel_size = le64_to_cpu(kh->e_shoff) +
|
|
((uint32_t)le16_to_cpu(kh->e_shentsize) *
|
|
(uint32_t)le16_to_cpu(kh->e_shnum));
|
|
|
|
prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
|
|
kernel_entry, kernel_size);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool try_load_elf64(struct elf_hdr *header)
|
|
{
|
|
struct elf64be_hdr *kh = (struct elf64be_hdr *)header;
|
|
struct elf64le_hdr *khle = (struct elf64le_hdr *)header;
|
|
uint64_t load_base = (uint64_t)kh;
|
|
struct elf64be_phdr *ph;
|
|
struct elf64be_shdr *sh;
|
|
unsigned int i;
|
|
|
|
/* Check it's a ppc64 LE ELF */
|
|
if (khle->ei_ident == ELF_IDENT &&
|
|
khle->ei_data == ELF_DATA_LSB &&
|
|
le16_to_cpu(khle->e_machine) == ELF_MACH_PPC64) {
|
|
return try_load_elf64_le(header);
|
|
}
|
|
|
|
/* Check it's a ppc64 ELF */
|
|
if (kh->ei_ident != ELF_IDENT ||
|
|
kh->ei_data != ELF_DATA_MSB ||
|
|
be16_to_cpu(kh->e_machine) != ELF_MACH_PPC64) {
|
|
prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
|
|
return false;
|
|
}
|
|
|
|
/* Look for a loadable program header that has our entry in it
|
|
*
|
|
* Note that we execute the kernel in-place, we don't actually
|
|
* obey the load informations in the headers. This is expected
|
|
* to work for the Linux Kernel because it's a fairly dumb ELF
|
|
* but it will not work for any ELF binary.
|
|
*/
|
|
ph = (struct elf64be_phdr *)(load_base + be64_to_cpu(kh->e_phoff));
|
|
for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
|
|
if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
|
|
continue;
|
|
if (be64_to_cpu(ph->p_vaddr) > be64_to_cpu(kh->e_entry) ||
|
|
(be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_memsz)) <
|
|
be64_to_cpu(kh->e_entry))
|
|
continue;
|
|
|
|
/* Get our entry */
|
|
kernel_entry = be64_to_cpu(kh->e_entry) -
|
|
be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
|
|
break;
|
|
}
|
|
|
|
if (!kernel_entry) {
|
|
prerror("INIT: Failed to find kernel entry !\n");
|
|
return false;
|
|
}
|
|
|
|
/* For the normal big-endian ELF ABI, the kernel entry points
|
|
* to a function descriptor in the data section. Linux instead
|
|
* has it point directly to code. Test whether it is pointing
|
|
* into an executable section or not to figure this out. Default
|
|
* to assuming it obeys the ABI.
|
|
*/
|
|
sh = (struct elf64be_shdr *)(load_base + be64_to_cpu(kh->e_shoff));
|
|
for (i = 0; i < be16_to_cpu(kh->e_shnum); i++, sh++) {
|
|
if (be64_to_cpu(sh->sh_addr) <= be64_to_cpu(kh->e_entry) &&
|
|
(be64_to_cpu(sh->sh_addr) + be64_to_cpu(sh->sh_size)) >
|
|
be64_to_cpu(kh->e_entry))
|
|
break;
|
|
}
|
|
|
|
if (i == be16_to_cpu(kh->e_shnum) ||
|
|
!(be64_to_cpu(sh->sh_flags) & ELF_SFLAGS_X)) {
|
|
kernel_entry = *(uint64_t *)(kernel_entry + load_base);
|
|
kernel_entry = kernel_entry -
|
|
be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
|
|
}
|
|
|
|
kernel_entry += load_base;
|
|
kernel_32bit = false;
|
|
|
|
kernel_size = be64_to_cpu(kh->e_shoff) +
|
|
((uint32_t)be16_to_cpu(kh->e_shentsize) *
|
|
(uint32_t)be16_to_cpu(kh->e_shnum));
|
|
|
|
printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
|
|
kernel_entry, kernel_size);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool try_load_elf32_le(struct elf_hdr *header)
|
|
{
|
|
struct elf32le_hdr *kh = (struct elf32le_hdr *)header;
|
|
uint64_t load_base = (uint64_t)kh;
|
|
struct elf32le_phdr *ph;
|
|
unsigned int i;
|
|
|
|
printf("INIT: 32-bit LE kernel discovered\n");
|
|
|
|
/* Look for a loadable program header that has our entry in it
|
|
*
|
|
* Note that we execute the kernel in-place, we don't actually
|
|
* obey the load informations in the headers. This is expected
|
|
* to work for the Linux Kernel because it's a fairly dumb ELF
|
|
* but it will not work for any ELF binary.
|
|
*/
|
|
ph = (struct elf32le_phdr *)(load_base + le32_to_cpu(kh->e_phoff));
|
|
for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
|
|
if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
|
|
continue;
|
|
if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) ||
|
|
(le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) <
|
|
le32_to_cpu(kh->e_entry))
|
|
continue;
|
|
|
|
/* Get our entry */
|
|
kernel_entry = le32_to_cpu(kh->e_entry) -
|
|
le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset);
|
|
break;
|
|
}
|
|
|
|
if (!kernel_entry) {
|
|
prerror("INIT: Failed to find kernel entry !\n");
|
|
return false;
|
|
}
|
|
|
|
kernel_entry += load_base;
|
|
kernel_32bit = true;
|
|
|
|
printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool try_load_elf32(struct elf_hdr *header)
|
|
{
|
|
struct elf32be_hdr *kh = (struct elf32be_hdr *)header;
|
|
struct elf32le_hdr *khle = (struct elf32le_hdr *)header;
|
|
uint64_t load_base = (uint64_t)kh;
|
|
struct elf32be_phdr *ph;
|
|
unsigned int i;
|
|
|
|
/* Check it's a ppc32 LE ELF */
|
|
if (khle->ei_ident == ELF_IDENT &&
|
|
khle->ei_data == ELF_DATA_LSB &&
|
|
le16_to_cpu(khle->e_machine) == ELF_MACH_PPC32) {
|
|
return try_load_elf32_le(header);
|
|
}
|
|
|
|
/* Check it's a ppc32 ELF */
|
|
if (kh->ei_ident != ELF_IDENT ||
|
|
kh->ei_data != ELF_DATA_MSB ||
|
|
be16_to_cpu(kh->e_machine) != ELF_MACH_PPC32) {
|
|
prerror("INIT: Kernel doesn't look like an ppc32 ELF\n");
|
|
return false;
|
|
}
|
|
|
|
/* Look for a loadable program header that has our entry in it
|
|
*
|
|
* Note that we execute the kernel in-place, we don't actually
|
|
* obey the load informations in the headers. This is expected
|
|
* to work for the Linux Kernel because it's a fairly dumb ELF
|
|
* but it will not work for any ELF binary.
|
|
*/
|
|
ph = (struct elf32be_phdr *)(load_base + be32_to_cpu(kh->e_phoff));
|
|
for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
|
|
if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
|
|
continue;
|
|
if (be32_to_cpu(ph->p_vaddr) > be32_to_cpu(kh->e_entry) ||
|
|
(be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_memsz)) <
|
|
be32_to_cpu(kh->e_entry))
|
|
continue;
|
|
|
|
/* Get our entry */
|
|
kernel_entry = be32_to_cpu(kh->e_entry) -
|
|
be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_offset);
|
|
break;
|
|
}
|
|
|
|
if (!kernel_entry) {
|
|
prerror("INIT: Failed to find kernel entry !\n");
|
|
return false;
|
|
}
|
|
|
|
kernel_entry += load_base;
|
|
kernel_32bit = true;
|
|
|
|
printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
|
|
|
|
return true;
|
|
}
|
|
|
|
extern char __builtin_kernel_start[];
|
|
extern char __builtin_kernel_end[];
|
|
extern uint64_t boot_offset;
|
|
|
|
static size_t initramfs_size;
|
|
|
|
bool start_preload_kernel(void)
|
|
{
|
|
int loaded;
|
|
|
|
/* Try to load an external kernel payload through the platform hooks */
|
|
kernel_size = KERNEL_LOAD_SIZE;
|
|
loaded = start_preload_resource(RESOURCE_ID_KERNEL,
|
|
RESOURCE_SUBID_NONE,
|
|
KERNEL_LOAD_BASE,
|
|
&kernel_size);
|
|
if (loaded != OPAL_SUCCESS) {
|
|
printf("INIT: platform start load kernel failed\n");
|
|
kernel_size = 0;
|
|
return false;
|
|
}
|
|
|
|
initramfs_size = INITRAMFS_LOAD_SIZE;
|
|
loaded = start_preload_resource(RESOURCE_ID_INITRAMFS,
|
|
RESOURCE_SUBID_NONE,
|
|
INITRAMFS_LOAD_BASE, &initramfs_size);
|
|
if (loaded != OPAL_SUCCESS) {
|
|
printf("INIT: platform start load initramfs failed\n");
|
|
initramfs_size = 0;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool load_kernel(void)
|
|
{
|
|
void *stb_container = NULL;
|
|
struct elf_hdr *kh;
|
|
int loaded;
|
|
|
|
prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
|
|
|
|
loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,
|
|
RESOURCE_SUBID_NONE);
|
|
|
|
if (loaded != OPAL_SUCCESS) {
|
|
printf("INIT: platform wait for kernel load failed\n");
|
|
kernel_size = 0;
|
|
}
|
|
|
|
/* Try embedded kernel payload */
|
|
if (!kernel_size) {
|
|
kernel_size = __builtin_kernel_end - __builtin_kernel_start;
|
|
if (kernel_size) {
|
|
/* Move the built-in kernel up */
|
|
uint64_t builtin_base =
|
|
((uint64_t)__builtin_kernel_start) -
|
|
SKIBOOT_BASE + boot_offset;
|
|
printf("Using built-in kernel\n");
|
|
memmove(KERNEL_LOAD_BASE, (void*)builtin_base,
|
|
kernel_size);
|
|
}
|
|
}
|
|
|
|
if (dt_has_node_property(dt_chosen, "kernel-base-address", NULL)) {
|
|
kernel_entry = dt_prop_get_u64(dt_chosen,
|
|
"kernel-base-address");
|
|
prlog(PR_DEBUG, "INIT: Kernel image at 0x%llx\n", kernel_entry);
|
|
kh = (struct elf_hdr *)kernel_entry;
|
|
/*
|
|
* If the kernel is at 0, restore it as it was overwritten
|
|
* by our vectors.
|
|
*/
|
|
if (kernel_entry < EXCEPTION_VECTORS_END) {
|
|
cpu_set_sreset_enable(false);
|
|
memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
|
|
sync_icache();
|
|
} else {
|
|
/* Hack for STB in Mambo, assume at least 4kb in mem */
|
|
if (!kernel_size)
|
|
kernel_size = SECURE_BOOT_HEADERS_SIZE;
|
|
if (stb_is_container((void*)kernel_entry, kernel_size)) {
|
|
stb_container = (void*)kernel_entry;
|
|
kh = (struct elf_hdr *) (kernel_entry + SECURE_BOOT_HEADERS_SIZE);
|
|
} else
|
|
kh = (struct elf_hdr *) (kernel_entry);
|
|
}
|
|
} else {
|
|
if (!kernel_size) {
|
|
printf("INIT: Assuming kernel at %p\n",
|
|
KERNEL_LOAD_BASE);
|
|
/* Hack for STB in Mambo, assume at least 4kb in mem */
|
|
kernel_size = SECURE_BOOT_HEADERS_SIZE;
|
|
kernel_entry = (uint64_t)KERNEL_LOAD_BASE;
|
|
}
|
|
if (stb_is_container(KERNEL_LOAD_BASE, kernel_size)) {
|
|
stb_container = KERNEL_LOAD_BASE;
|
|
kh = (struct elf_hdr *) (KERNEL_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE);
|
|
} else
|
|
kh = (struct elf_hdr *) (KERNEL_LOAD_BASE);
|
|
|
|
}
|
|
|
|
prlog(PR_DEBUG,
|
|
"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
|
|
kernel_size);
|
|
|
|
if (kh->ei_ident != ELF_IDENT) {
|
|
prerror("INIT: ELF header not found. Assuming raw binary.\n");
|
|
return true;
|
|
}
|
|
|
|
if (kh->ei_class == ELF_CLASS_64) {
|
|
if (!try_load_elf64(kh))
|
|
return false;
|
|
} else if (kh->ei_class == ELF_CLASS_32) {
|
|
if (!try_load_elf32(kh))
|
|
return false;
|
|
} else {
|
|
prerror("INIT: Neither ELF32 not ELF64 ?\n");
|
|
return false;
|
|
}
|
|
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
|
|
secureboot_verify(RESOURCE_ID_KERNEL,
|
|
stb_container,
|
|
SECURE_BOOT_HEADERS_SIZE + kernel_size);
|
|
trustedboot_measure(RESOURCE_ID_KERNEL,
|
|
stb_container,
|
|
SECURE_BOOT_HEADERS_SIZE + kernel_size);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void load_initramfs(void)
|
|
{
|
|
uint64_t *initramfs_start;
|
|
void *stb_container = NULL;
|
|
int loaded;
|
|
|
|
loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
|
|
RESOURCE_SUBID_NONE);
|
|
|
|
if (loaded != OPAL_SUCCESS || !initramfs_size)
|
|
return;
|
|
|
|
if (stb_is_container(INITRAMFS_LOAD_BASE, initramfs_size)) {
|
|
stb_container = INITRAMFS_LOAD_BASE;
|
|
initramfs_start = INITRAMFS_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE;
|
|
} else {
|
|
initramfs_start = INITRAMFS_LOAD_BASE;
|
|
}
|
|
|
|
dt_check_del_prop(dt_chosen, "linux,initrd-start");
|
|
dt_check_del_prop(dt_chosen, "linux,initrd-end");
|
|
|
|
printf("INIT: Initramfs loaded, size: %zu bytes\n", initramfs_size);
|
|
|
|
dt_add_property_u64(dt_chosen, "linux,initrd-start",
|
|
(uint64_t)initramfs_start);
|
|
dt_add_property_u64(dt_chosen, "linux,initrd-end",
|
|
(uint64_t)initramfs_start + initramfs_size);
|
|
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
|
|
secureboot_verify(RESOURCE_ID_INITRAMFS,
|
|
stb_container,
|
|
SECURE_BOOT_HEADERS_SIZE + initramfs_size);
|
|
trustedboot_measure(RESOURCE_ID_INITRAMFS,
|
|
stb_container,
|
|
SECURE_BOOT_HEADERS_SIZE + initramfs_size);
|
|
}
|
|
}
|
|
|
|
static void cpu_disable_ME_RI_one(void *param __unused)
|
|
{
|
|
disable_machine_check();
|
|
mtmsrd(0, 1);
|
|
}
|
|
|
|
static int64_t cpu_disable_ME_RI_all(void)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job **jobs;
|
|
|
|
jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
|
|
assert(jobs);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (cpu == this_cpu())
|
|
continue;
|
|
jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
|
|
cpu_disable_ME_RI_one, NULL);
|
|
}
|
|
|
|
/* this cpu */
|
|
cpu_disable_ME_RI_one(NULL);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (jobs[cpu->pir])
|
|
cpu_wait_job(jobs[cpu->pir], true);
|
|
}
|
|
|
|
free(jobs);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static void *fdt;
|
|
|
|
void __noreturn load_and_boot_kernel(bool is_reboot)
|
|
{
|
|
const struct dt_property *memprop;
|
|
const char *cmdline, *stdoutp;
|
|
uint64_t mem_top;
|
|
|
|
memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
|
|
if (memprop)
|
|
mem_top = (u64)dt_property_get_cell(memprop, 0) << 32
|
|
| dt_property_get_cell(memprop, 1);
|
|
else /* XXX HB hack, might want to calc it */
|
|
mem_top = 0x40000000;
|
|
|
|
op_display(OP_LOG, OP_MOD_INIT, 0x000A);
|
|
|
|
/* Load kernel LID */
|
|
if (!load_kernel()) {
|
|
op_display(OP_FATAL, OP_MOD_INIT, 1);
|
|
abort();
|
|
}
|
|
|
|
load_initramfs();
|
|
|
|
trustedboot_exit_boot_services();
|
|
|
|
#ifdef CONFIG_PLDM
|
|
pldm_platform_send_progress_state_change(
|
|
PLDM_STATE_SET_BOOT_PROG_STATE_STARTING_OP_SYS);
|
|
#else
|
|
ipmi_set_fw_progress_sensor(IPMI_FW_OS_BOOT);
|
|
#endif
|
|
|
|
if (!is_reboot) {
|
|
/* We wait for the nvram read to complete here so we can
|
|
* grab stuff from there such as the kernel arguments
|
|
*/
|
|
nvram_wait_for_load();
|
|
|
|
if (!occ_sensors_init())
|
|
dts_sensor_create_nodes(sensor_node);
|
|
|
|
opal_mpipl_init();
|
|
|
|
} else {
|
|
/* fdt will be rebuilt */
|
|
free(fdt);
|
|
fdt = NULL;
|
|
|
|
nvram_reinit();
|
|
occ_pstates_init();
|
|
}
|
|
|
|
/* Use nvram bootargs over device tree */
|
|
cmdline = nvram_query_safe("bootargs");
|
|
if (cmdline) {
|
|
dt_check_del_prop(dt_chosen, "bootargs");
|
|
dt_add_property_string(dt_chosen, "bootargs", cmdline);
|
|
prlog(PR_DEBUG, "INIT: Command line from NVRAM: %s\n",
|
|
cmdline);
|
|
}
|
|
|
|
op_display(OP_LOG, OP_MOD_INIT, 0x000B);
|
|
|
|
add_fast_reboot_dt_entries();
|
|
|
|
if (platform.finalise_dt)
|
|
platform.finalise_dt(is_reboot);
|
|
|
|
/* Create the device tree blob to boot OS. */
|
|
fdt = create_dtb(dt_root, false);
|
|
if (!fdt) {
|
|
op_display(OP_FATAL, OP_MOD_INIT, 2);
|
|
abort();
|
|
}
|
|
|
|
op_display(OP_LOG, OP_MOD_INIT, 0x000C);
|
|
|
|
mem_dump_free();
|
|
|
|
/* Dump the selected console */
|
|
stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
|
|
prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
|
|
|
|
fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
|
|
|
|
/* Check there is something there before we branch to it */
|
|
if (*(uint32_t *)kernel_entry == 0) {
|
|
prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
|
|
assert(0);
|
|
}
|
|
|
|
if (platform.exit)
|
|
platform.exit();
|
|
|
|
/* Take processors out of nap */
|
|
cpu_set_sreset_enable(false);
|
|
cpu_set_ipi_enable(false);
|
|
|
|
printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n",
|
|
kernel_entry, fdt, fdt_totalsize(fdt));
|
|
|
|
/* Disable machine checks on all */
|
|
cpu_disable_ME_RI_all();
|
|
|
|
patch_traps(false);
|
|
cpu_set_hile_mode(false); /* Clear HILE on all CPUs */
|
|
|
|
checksum_romem();
|
|
|
|
debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
|
|
|
|
cpu_give_self_os();
|
|
|
|
if (kernel_32bit)
|
|
start_kernel32(kernel_entry, fdt, mem_top);
|
|
start_kernel(kernel_entry, fdt, mem_top);
|
|
}
|
|
|
|
static void storage_keys_fixup(void)
|
|
{
|
|
struct dt_node *cpus, *n;
|
|
|
|
cpus = dt_find_by_path(dt_root, "/cpus");
|
|
assert(cpus);
|
|
|
|
if (proc_gen == proc_gen_unknown)
|
|
return;
|
|
|
|
dt_for_each_child(cpus, n) {
|
|
/* There may be cache nodes in /cpus. */
|
|
if (!dt_has_node_property(n, "device_type", "cpu") ||
|
|
dt_has_node_property(n, "ibm,processor-storage-keys", NULL))
|
|
continue;
|
|
|
|
/*
|
|
* skiboot supports p8 & p9, both of which support the IAMR, and
|
|
* both of which support 32 keys. So advertise 32 keys for data
|
|
* accesses and 32 for instruction accesses.
|
|
*/
|
|
dt_add_property_cells(n, "ibm,processor-storage-keys", 32, 32);
|
|
}
|
|
}
|
|
|
|
static void dt_fixups(void)
|
|
{
|
|
struct dt_node *n;
|
|
struct dt_node *primary_lpc = NULL;
|
|
|
|
/* lpc node missing #address/size cells. Also pick one as
|
|
* primary for now (TBD: How to convey that from HB)
|
|
*/
|
|
dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") {
|
|
if (!primary_lpc || dt_has_node_property(n, "primary", NULL))
|
|
primary_lpc = n;
|
|
if (dt_has_node_property(n, "#address-cells", NULL))
|
|
break;
|
|
dt_add_property_cells(n, "#address-cells", 2);
|
|
dt_add_property_cells(n, "#size-cells", 1);
|
|
dt_add_property_strings(n, "status", "ok");
|
|
}
|
|
|
|
/* Missing "primary" property in LPC bus */
|
|
if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL))
|
|
dt_add_property(primary_lpc, "primary", NULL, 0);
|
|
|
|
/* Missing "scom-controller" */
|
|
dt_for_each_compatible(dt_root, n, "ibm,xscom") {
|
|
if (!dt_has_node_property(n, "scom-controller", NULL))
|
|
dt_add_property(n, "scom-controller", NULL, 0);
|
|
}
|
|
|
|
storage_keys_fixup();
|
|
}
|
|
|
|
static void add_arch_vector(void)
|
|
{
|
|
/**
|
|
* vec5 = a PVR-list : Number-of-option-vectors :
|
|
* option-vectors[Number-of-option-vectors + 1]
|
|
*/
|
|
uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00};
|
|
|
|
if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL))
|
|
return;
|
|
|
|
dt_add_property(dt_chosen, "ibm,architecture-vec-5",
|
|
vec5, sizeof(vec5));
|
|
}
|
|
|
|
static void dt_init_misc(void)
|
|
{
|
|
/* Check if there's a /chosen node, if not, add one */
|
|
dt_chosen = dt_find_by_path(dt_root, "/chosen");
|
|
if (!dt_chosen)
|
|
dt_chosen = dt_new(dt_root, "chosen");
|
|
assert(dt_chosen);
|
|
|
|
/* Add IBM architecture vectors if needed */
|
|
add_arch_vector();
|
|
|
|
/* Add the "OPAL virtual ICS*/
|
|
add_ics_node();
|
|
|
|
/* Additional fixups. TODO: Move into platform */
|
|
dt_fixups();
|
|
}
|
|
|
|
static u8 console_get_level(const char *s)
|
|
{
|
|
if (strcmp(s, "emerg") == 0)
|
|
return PR_EMERG;
|
|
if (strcmp(s, "alert") == 0)
|
|
return PR_ALERT;
|
|
if (strcmp(s, "crit") == 0)
|
|
return PR_CRIT;
|
|
if (strcmp(s, "err") == 0)
|
|
return PR_ERR;
|
|
if (strcmp(s, "warning") == 0)
|
|
return PR_WARNING;
|
|
if (strcmp(s, "notice") == 0)
|
|
return PR_NOTICE;
|
|
if (strcmp(s, "printf") == 0)
|
|
return PR_PRINTF;
|
|
if (strcmp(s, "info") == 0)
|
|
return PR_INFO;
|
|
if (strcmp(s, "debug") == 0)
|
|
return PR_DEBUG;
|
|
if (strcmp(s, "trace") == 0)
|
|
return PR_TRACE;
|
|
if (strcmp(s, "insane") == 0)
|
|
return PR_INSANE;
|
|
/* Assume it's a number instead */
|
|
return atoi(s);
|
|
}
|
|
|
|
static void console_log_level(void)
|
|
{
|
|
const char *s;
|
|
u8 level;
|
|
|
|
/* console log level:
|
|
* high 4 bits in memory, low 4 bits driver (e.g. uart). */
|
|
s = nvram_query_safe("log-level-driver");
|
|
if (s) {
|
|
level = console_get_level(s);
|
|
debug_descriptor.console_log_levels =
|
|
(debug_descriptor.console_log_levels & 0xf0 ) |
|
|
(level & 0x0f);
|
|
prlog(PR_NOTICE, "console: Setting driver log level to %i\n",
|
|
level & 0x0f);
|
|
}
|
|
s = nvram_query_safe("log-level-memory");
|
|
if (s) {
|
|
level = console_get_level(s);
|
|
debug_descriptor.console_log_levels =
|
|
(debug_descriptor.console_log_levels & 0x0f ) |
|
|
((level & 0x0f) << 4);
|
|
prlog(PR_NOTICE, "console: Setting memory log level to %i\n",
|
|
level & 0x0f);
|
|
}
|
|
}
|
|
|
|
typedef void (*ctorcall_t)(void);
|
|
|
|
static void __nomcount do_ctors(void)
|
|
{
|
|
extern ctorcall_t __ctors_start[], __ctors_end[];
|
|
ctorcall_t *call;
|
|
|
|
for (call = __ctors_start; call < __ctors_end; call++)
|
|
(*call)();
|
|
}
|
|
|
|
#ifdef ELF_ABI_v2
|
|
static void setup_branch_null_catcher(void)
|
|
{
|
|
asm volatile( \
|
|
".section .rodata" "\n\t" \
|
|
"3: .string \"branch to NULL\"" "\n\t" \
|
|
".previous" "\n\t" \
|
|
".section .trap_table,\"aw\"" "\n\t" \
|
|
".llong 0" "\n\t" \
|
|
".llong 3b" "\n\t" \
|
|
".previous" "\n\t" \
|
|
);
|
|
}
|
|
#else
|
|
static void branch_null(void)
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
static void setup_branch_null_catcher(void)
|
|
{
|
|
void (*bn)(void) = branch_null;
|
|
|
|
/*
|
|
* FIXME: This copies the function descriptor (16 bytes) for
|
|
* ABI v1 (ie. big endian). This will be broken if we ever
|
|
* move to ABI v2 (ie little endian)
|
|
*/
|
|
memcpy_null((void *)0, bn, 16);
|
|
}
|
|
#endif
|
|
|
|
void copy_sreset_vector(void)
|
|
{
|
|
uint32_t *src, *dst;
|
|
|
|
/* Copy the reset code over the entry point. */
|
|
src = &reset_patch_start;
|
|
dst = (uint32_t *)0x100;
|
|
while(src < &reset_patch_end)
|
|
*(dst++) = *(src++);
|
|
sync_icache();
|
|
}
|
|
|
|
void copy_sreset_vector_fast_reboot(void)
|
|
{
|
|
uint32_t *src, *dst;
|
|
|
|
/* Copy the reset code over the entry point. */
|
|
src = &reset_fast_reboot_patch_start;
|
|
dst = (uint32_t *)0x100;
|
|
while(src < &reset_fast_reboot_patch_end)
|
|
*(dst++) = *(src++);
|
|
sync_icache();
|
|
}
|
|
|
|
void copy_exception_vectors(void)
|
|
{
|
|
/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
|
|
* this is the boot flag used by CPUs still potentially entering
|
|
* skiboot.
|
|
*/
|
|
void *skiboot_constant_addr exception_vectors_start_addr = (void *)(SKIBOOT_BASE + 0x100);
|
|
void *skiboot_constant_addr dst = (void *)0x100;
|
|
|
|
|
|
memcpy(dst, exception_vectors_start_addr,
|
|
EXCEPTION_VECTORS_END - 0x100);
|
|
sync_icache();
|
|
}
|
|
|
|
/*
|
|
* When skiboot owns the exception vectors, patch in 'trap' for assert fails.
|
|
* Otherwise use assert_fail()
|
|
*/
|
|
void patch_traps(bool enable)
|
|
{
|
|
struct trap_table_entry *tte;
|
|
|
|
for (tte = __trap_table_start; tte < __trap_table_end; tte++) {
|
|
uint32_t *insn;
|
|
|
|
insn = (uint32_t *)tte->address;
|
|
if (enable) {
|
|
*insn = PPC_INST_TRAP;
|
|
} else {
|
|
*insn = PPC_INST_NOP;
|
|
}
|
|
}
|
|
|
|
sync_icache();
|
|
}
|
|
|
|
static void per_thread_sanity_checks(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
/**
|
|
* @fwts-label NonZeroHRMOR
|
|
* @fwts-advice The contents of the hypervisor real mode offset register
|
|
* (HRMOR) is bitwise orded with the address of any hypervisor real mode
|
|
* (i.e Skiboot) memory accesses. Skiboot does not support operating
|
|
* with a non-zero HRMOR and setting it will break some things (e.g
|
|
* XSCOMs) in hard-to-debug ways.
|
|
*/
|
|
assert(mfspr(SPR_HRMOR) == 0);
|
|
|
|
/**
|
|
* @fwts-label UnknownSecondary
|
|
* @fwts-advice The boot CPU attampted to call in a secondary thread
|
|
* without initialising the corresponding cpu_thread structure. This may
|
|
* happen if the HDAT or devicetree reports too few threads or cores for
|
|
* this processor.
|
|
*/
|
|
assert(cpu->state != cpu_state_no_cpu);
|
|
}
|
|
|
|
void pci_nvram_init(void)
|
|
{
|
|
const char *nvram_speed;
|
|
|
|
verbose_eeh = nvram_query_eq_safe("pci-eeh-verbose", "true");
|
|
if (verbose_eeh)
|
|
prlog(PR_INFO, "PHB: Verbose EEH enabled\n");
|
|
|
|
pcie_max_link_speed = 0;
|
|
|
|
nvram_speed = nvram_query_dangerous("pcie-max-link-speed");
|
|
if (nvram_speed) {
|
|
pcie_max_link_speed = atoi(nvram_speed);
|
|
prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
|
|
pcie_max_link_speed);
|
|
}
|
|
|
|
pci_tracing = nvram_query_eq_safe("pci-tracing", "true");
|
|
}
|
|
|
|
static uint32_t mem_csum(void *_p, void *_e)
|
|
{
|
|
size_t len = _e - _p;
|
|
uint32_t *p = _p;
|
|
uint32_t v1 = 0, v2 = 0;
|
|
uint32_t csum;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < len; i += 4) {
|
|
uint32_t v = *p++;
|
|
v1 += v;
|
|
v2 += v1;
|
|
}
|
|
|
|
csum = v1 ^ v2;
|
|
|
|
return csum;
|
|
}
|
|
|
|
static uint32_t romem_csum;
|
|
|
|
static void checksum_romem(void)
|
|
{
|
|
uint32_t csum;
|
|
|
|
romem_csum = 0;
|
|
if (chip_quirk(QUIRK_SLOW_SIM))
|
|
return;
|
|
|
|
csum = mem_csum(_start, _head_end);
|
|
romem_csum ^= csum;
|
|
|
|
csum = mem_csum(_stext, _romem_end);
|
|
romem_csum ^= csum;
|
|
|
|
csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);
|
|
romem_csum ^= csum;
|
|
}
|
|
|
|
bool verify_romem(void)
|
|
{
|
|
uint32_t old = romem_csum;
|
|
checksum_romem();
|
|
if (old != romem_csum) {
|
|
romem_csum = old;
|
|
prlog(PR_NOTICE, "OPAL checksums did not match\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static void mask_pc_system_xstop(void)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
uint32_t chip_id, core_id;
|
|
int rc;
|
|
|
|
if (proc_gen != proc_gen_p10 && proc_gen != proc_gen_p11)
|
|
return;
|
|
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS) || chip_quirk(QUIRK_AWAN))
|
|
return;
|
|
|
|
/*
|
|
* On P10 Mask PC system checkstop (bit 28). This is needed
|
|
* for HW570622. We keep processor recovery disabled via
|
|
* HID[5] and mask the checkstop that it can cause. CME does
|
|
* the recovery handling for us.
|
|
*/
|
|
for_each_cpu(cpu) {
|
|
chip_id = cpu->chip_id;
|
|
core_id = pir_to_core_id(cpu->pir);
|
|
|
|
rc = xscom_write(chip_id,
|
|
XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIRMASK_OR),
|
|
PPC_BIT(28));
|
|
if (rc)
|
|
prerror("Error setting FIR MASK rc:%d on PIR:%x\n",
|
|
rc, cpu->pir);
|
|
}
|
|
}
|
|
|
|
bool lpar_per_core = false;
|
|
|
|
static void probe_lpar_per_core(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
uint32_t chip_id = pir_to_chip_id(cpu->pir);
|
|
uint32_t core_id = pir_to_core_id(cpu->pir);
|
|
uint64_t addr;
|
|
uint64_t core_thread_state;
|
|
int rc;
|
|
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS) || chip_quirk(QUIRK_AWAN))
|
|
return;
|
|
|
|
if (proc_gen == proc_gen_p9)
|
|
addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE);
|
|
else if (proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_CORE_THREAD_STATE);
|
|
else
|
|
return;
|
|
|
|
rc = xscom_read(chip_id, addr, &core_thread_state);
|
|
if (rc) {
|
|
prerror("Error reading CORE_THREAD_STATE rc:%d on PIR:%x\n",
|
|
rc, cpu->pir);
|
|
return;
|
|
}
|
|
|
|
if (core_thread_state & PPC_BIT(62)) {
|
|
lpar_per_core = true;
|
|
prlog(PR_WARNING, "LPAR-per-core mode detected. KVM may not be usable.\n");
|
|
}
|
|
}
|
|
|
|
|
|
/* Called from head.S, thus no prototype. */
|
|
void __noreturn __nomcount main_cpu_entry(const void *fdt);
|
|
|
|
void __noreturn __nomcount main_cpu_entry(const void *fdt)
|
|
{
|
|
/*
|
|
* WARNING: At this point. the timebases have
|
|
* *not* been synchronized yet. Do not use any timebase
|
|
* related functions for timeouts etc... unless you can cope
|
|
* with the speed being some random core clock divider and
|
|
* the value jumping backward when the synchronization actually
|
|
* happens (in chiptod_init() below).
|
|
*
|
|
* Also the current cpu_thread() struct is not initialized
|
|
* either so we need to clear it out first thing first (without
|
|
* putting any other useful info in there jus yet) otherwise
|
|
* printf an locks are going to play funny games with "con_suspend"
|
|
*/
|
|
pre_init_boot_cpu();
|
|
|
|
/*
|
|
* Point to our mem console
|
|
*/
|
|
debug_descriptor.memcons_phys = cpu_to_be64((uint64_t)&memcons);
|
|
|
|
/*
|
|
* Before first printk, ensure console buffer is clear or
|
|
* reading tools might think it has wrapped
|
|
*/
|
|
clear_console();
|
|
|
|
/* Backup previous vectors as this could contain a kernel
|
|
* image.
|
|
*/
|
|
memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
|
|
|
|
/*
|
|
* Some boot firmwares enter OPAL with MSR[ME]=1, as they presumably
|
|
* handle machine checks until we take over. As we overwrite the
|
|
* previous exception vectors with our own handlers, disable MSR[ME].
|
|
* This could be done atomically by patching in a branch then patching
|
|
* it out last, but that's a lot of effort.
|
|
*/
|
|
disable_machine_check();
|
|
|
|
/* Copy all vectors down to 0 */
|
|
copy_exception_vectors();
|
|
|
|
/* Enable trap based asserts */
|
|
patch_traps(true);
|
|
|
|
/*
|
|
* Enable MSR[ME] bit so we can take MCEs. We don't currently
|
|
* recover, but we print some useful information.
|
|
*/
|
|
enable_machine_check();
|
|
mtmsrd(MSR_RI, 1);
|
|
|
|
/* Setup a NULL catcher to catch accidental NULL ptr calls */
|
|
setup_branch_null_catcher();
|
|
|
|
/* Call library constructors */
|
|
do_ctors();
|
|
|
|
prlog(PR_NOTICE, "OPAL %s%s starting...\n", version, DEBUG_STR);
|
|
|
|
prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n",
|
|
(debug_descriptor.console_log_levels >> 4),
|
|
(debug_descriptor.console_log_levels & 0x0f));
|
|
prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n");
|
|
|
|
#ifdef SKIBOOT_GCOV
|
|
skiboot_gcov_done();
|
|
#endif
|
|
|
|
/* Initialize boot cpu's cpu_thread struct */
|
|
init_boot_cpu();
|
|
|
|
/* Now locks can be used */
|
|
init_locks();
|
|
|
|
/* Create the OPAL call table early on, entries can be overridden
|
|
* later on (FSP console code for example)
|
|
*/
|
|
opal_table_init();
|
|
|
|
/* Init the physical map table so we can start mapping things */
|
|
phys_map_init(mfspr(SPR_PVR));
|
|
|
|
/*
|
|
* If we are coming in with a flat device-tree, we expand it
|
|
* now. Else look for HDAT and create a device-tree from them
|
|
*
|
|
* Hack alert: When entering via the OPAL entry point, fdt
|
|
* is set to -1, we record that and pass it to parse_hdat
|
|
*/
|
|
|
|
dt_root = dt_new_root("");
|
|
|
|
if (fdt == (void *)-1ul) {
|
|
if (parse_hdat(true) < 0)
|
|
abort();
|
|
} else if (fdt == NULL) {
|
|
if (parse_hdat(false) < 0)
|
|
abort();
|
|
} else {
|
|
dt_expand(fdt);
|
|
}
|
|
dt_add_cpufeatures(dt_root);
|
|
|
|
/* Now that we have a full devicetree, verify that we aren't on fire. */
|
|
per_thread_sanity_checks();
|
|
|
|
/*
|
|
* From there, we follow a fairly strict initialization order.
|
|
*
|
|
* First we need to build up our chip data structures and initialize
|
|
* XSCOM which will be needed for a number of susbequent things.
|
|
*
|
|
* We want XSCOM available as early as the platform probe in case the
|
|
* probe requires some HW accesses.
|
|
*
|
|
* We also initialize the FSI master at that point in case we need
|
|
* to access chips via that path early on.
|
|
*/
|
|
init_chips();
|
|
|
|
xscom_init();
|
|
mfsi_init();
|
|
|
|
/*
|
|
* Direct controls facilities provides some controls over CPUs
|
|
* using scoms.
|
|
*/
|
|
direct_controls_init();
|
|
|
|
/*
|
|
* Put various bits & pieces in device-tree that might not
|
|
* already be there such as the /chosen node if not there yet,
|
|
* the ICS node, etc... This can potentially use XSCOM
|
|
*/
|
|
dt_init_misc();
|
|
|
|
/*
|
|
* Initialize LPC (P8 and beyond) so we can get to UART, BMC and
|
|
* other system controller. This is done before probe_platform
|
|
* so that the platform probing code can access an external
|
|
* BMC if needed.
|
|
*/
|
|
lpc_init();
|
|
|
|
/*
|
|
* This should be done before mem_region_init, so the stack
|
|
* region length can be set according to the maximum PIR.
|
|
*/
|
|
init_cpu_max_pir();
|
|
|
|
/*
|
|
* Now, we init our memory map from the device-tree, and immediately
|
|
* reserve areas which we know might contain data coming from
|
|
* HostBoot. We need to do these things before we start doing
|
|
* allocations outside of our heap, such as chip local allocs,
|
|
* otherwise we might clobber those data.
|
|
*/
|
|
mem_region_init();
|
|
|
|
/*
|
|
* Reserve memory required to capture OPAL dump. This should be done
|
|
* immediately after mem_region_init to avoid any clash with local
|
|
* memory allocation.
|
|
*/
|
|
opal_mpipl_reserve_mem();
|
|
|
|
/* Reserve HOMER and OCC area */
|
|
homer_init();
|
|
|
|
/* Initialize the rest of the cpu thread structs */
|
|
init_all_cpus();
|
|
if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
cpu_set_ipi_enable(true);
|
|
|
|
/* Once all CPU are up apply this workaround */
|
|
mask_pc_system_xstop();
|
|
|
|
/* P9/10 may be in LPAR-per-core mode, which is incompatible with KVM */
|
|
probe_lpar_per_core();
|
|
|
|
/* Add the /opal node to the device-tree */
|
|
add_opal_node();
|
|
|
|
/*
|
|
* We probe the platform now. This means the platform probe gets
|
|
* the opportunity to reserve additional areas of memory if needed.
|
|
*
|
|
* Note: Timebases still not synchronized.
|
|
*/
|
|
probe_platform();
|
|
|
|
/* Allocate our split trace buffers now. Depends add_opal_node() */
|
|
init_trace_buffers();
|
|
|
|
/* On P8, get the ICPs and make sure they are in a sane state */
|
|
init_interrupts();
|
|
if (proc_gen == proc_gen_p8)
|
|
cpu_set_ipi_enable(true);
|
|
|
|
/* On P9 and P10, initialize XIVE */
|
|
if (proc_gen == proc_gen_p9)
|
|
init_xive();
|
|
else if (proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
xive2_init();
|
|
|
|
/* Grab centaurs from device-tree if present (only on FSP-less) */
|
|
centaur_init();
|
|
|
|
/* initialize ocmb scom-controller */
|
|
ocmb_init();
|
|
|
|
/* Initialize PSI (depends on probe_platform being called) */
|
|
psi_init();
|
|
|
|
/* Initialize/enable LPC interrupts. This must be done after the
|
|
* PSI interface has been initialized since it serves as an interrupt
|
|
* source for LPC interrupts.
|
|
*/
|
|
lpc_init_interrupts();
|
|
|
|
/* Call in secondary CPUs */
|
|
cpu_bringup();
|
|
|
|
/* We can now overwrite the 0x100 vector as we are no longer being
|
|
* entered there.
|
|
*/
|
|
copy_sreset_vector();
|
|
|
|
/* We can now do NAP mode */
|
|
cpu_set_sreset_enable(true);
|
|
|
|
/*
|
|
* Synchronize time bases. Prior to chiptod_init() the timebase
|
|
* is free-running at a frequency based on the core clock rather
|
|
* than being synchronised to the ChipTOD network. This means
|
|
* that the timestamps in early boot might be a little off compared
|
|
* to wall clock time.
|
|
*/
|
|
chiptod_init();
|
|
|
|
/* Initialize P9 DIO */
|
|
p9_dio_init();
|
|
|
|
/*
|
|
* SBE uses TB value for scheduling timer. Hence init after
|
|
* chiptod init
|
|
*/
|
|
p9_sbe_init();
|
|
|
|
/* Initialize i2c */
|
|
p8_i2c_init();
|
|
|
|
/* Register routine to dispatch and read sensors */
|
|
sensor_init();
|
|
|
|
/*
|
|
* Initialize the opal messaging before platform.init as we are
|
|
* getting request to queue occ load opal message when host services
|
|
* got load occ request from FSP
|
|
*/
|
|
opal_init_msg();
|
|
|
|
/*
|
|
* We have initialized the basic HW, we can now call into the
|
|
* platform to perform subsequent inits, such as establishing
|
|
* communication with the FSP or starting IPMI.
|
|
*/
|
|
if (platform.init)
|
|
platform.init();
|
|
|
|
/* Read in NVRAM and set it up */
|
|
nvram_init();
|
|
|
|
/* Set the console level */
|
|
console_log_level();
|
|
|
|
/* Secure/Trusted Boot init. We look for /ibm,secureboot in DT */
|
|
secureboot_init();
|
|
trustedboot_init();
|
|
|
|
/* Secure variables init, handled by platform */
|
|
if (platform.secvar_init && is_fw_secureboot())
|
|
platform.secvar_init();
|
|
|
|
/*
|
|
* BMC platforms load version information from flash after
|
|
* secure/trustedboot init.
|
|
*/
|
|
if (platform.bmc)
|
|
flash_fw_version_preload();
|
|
|
|
/* preload the IMC catalog dtb */
|
|
imc_catalog_preload();
|
|
|
|
/* Install the OPAL Console handlers */
|
|
init_opal_console();
|
|
|
|
/*
|
|
* Some platforms set a flag to wait for SBE validation to be
|
|
* performed by the BMC. If this occurs it leaves the SBE in a
|
|
* bad state and the system will reboot at this point.
|
|
*/
|
|
if (platform.seeprom_update)
|
|
platform.seeprom_update();
|
|
|
|
/* Init SLW related stuff, including fastsleep */
|
|
slw_init();
|
|
|
|
op_display(OP_LOG, OP_MOD_INIT, 0x0002);
|
|
|
|
/*
|
|
* On some POWER9 BMC systems, we need to initialise the OCC
|
|
* before the NPU to facilitate NVLink/OpenCAPI presence
|
|
* detection, so we set it up as early as possible. On FSP
|
|
* systems, Hostboot starts booting the OCC later, so we delay
|
|
* OCC initialisation as late as possible to give it the
|
|
* maximum time to boot up.
|
|
*/
|
|
if (platform.bmc)
|
|
occ_pstates_init();
|
|
|
|
pci_nvram_init();
|
|
|
|
preload_capp_ucode();
|
|
start_preload_kernel();
|
|
|
|
/* Catalog decompression routine */
|
|
imc_decompress_catalog();
|
|
|
|
/* Probe all HWPROBE hardware we have code linked for */
|
|
probe_hardware();
|
|
|
|
/* Initialize PCI */
|
|
pci_init_slots();
|
|
|
|
/* Add OPAL timer related properties */
|
|
late_init_timers();
|
|
|
|
/* Setup ibm,firmware-versions if able */
|
|
if (platform.bmc) {
|
|
flash_dt_add_fw_version();
|
|
#ifdef CONFIG_PLDM
|
|
pldm_fru_dt_add_bmc_version();
|
|
#else
|
|
ipmi_dt_add_bmc_info();
|
|
#endif
|
|
}
|
|
|
|
#ifdef CONFIG_PLDM
|
|
pldm_platform_send_progress_state_change(
|
|
PLDM_STATE_SET_BOOT_PROG_STATE_PCI_RESORUCE_CONFIG);
|
|
#else
|
|
ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
|
|
#endif
|
|
|
|
/*
|
|
* These last few things must be done as late as possible
|
|
* because they rely on various other things having been setup,
|
|
* for example, add_opal_interrupts() will add all the interrupt
|
|
* sources that are going to the firmware. We can't add a new one
|
|
* after that call. Similarly, the mem_region calls will construct
|
|
* the reserve maps in the DT so we shouldn't affect the memory
|
|
* regions after that
|
|
*/
|
|
|
|
/* Create the LPC bus interrupt-map on P9 */
|
|
lpc_finalize_interrupts();
|
|
|
|
/* Add the list of interrupts going to OPAL */
|
|
add_opal_interrupts();
|
|
|
|
/* Init In-Memory Collection related stuff (load the IMC dtb into memory) */
|
|
imc_init();
|
|
|
|
/* Disable protected execution facility in BML */
|
|
cpu_disable_pef();
|
|
|
|
/* export the trace buffers */
|
|
trace_add_dt_props();
|
|
|
|
/* Now release parts of memory nodes we haven't used ourselves... */
|
|
mem_region_release_unused();
|
|
|
|
/* ... and add remaining reservations to the DT */
|
|
mem_region_add_dt_reserved();
|
|
|
|
/*
|
|
* Update /ibm,secureboot/ibm,cvc/memory-region to point to
|
|
* /reserved-memory/secure-crypt-algo-code instead of
|
|
* /ibm,hostboot/reserved-memory/secure-crypt-algo-code.
|
|
*/
|
|
cvc_update_reserved_memory_phandle();
|
|
|
|
prd_register_reserved_memory();
|
|
|
|
load_and_boot_kernel(false);
|
|
}
|
|
|
|
void __noreturn __secondary_cpu_entry(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
/* Secondary CPU called in */
|
|
cpu_callin(cpu);
|
|
|
|
enable_machine_check();
|
|
mtmsrd(MSR_RI, 1);
|
|
|
|
/* Some XIVE setup */
|
|
if (proc_gen == proc_gen_p9)
|
|
xive_cpu_callin(cpu);
|
|
else if (proc_gen == proc_gen_p10 || proc_gen == proc_gen_p11)
|
|
xive2_cpu_callin(cpu);
|
|
|
|
/* Wait for work to do */
|
|
while(true) {
|
|
if (cpu_check_jobs(cpu))
|
|
cpu_process_jobs();
|
|
else
|
|
cpu_idle_job();
|
|
}
|
|
}
|
|
|
|
/* Called from head.S, thus no prototype. */
|
|
void __noreturn __nomcount secondary_cpu_entry(void);
|
|
|
|
void __noreturn __nomcount secondary_cpu_entry(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
per_thread_sanity_checks();
|
|
|
|
prlog(PR_DEBUG, "INIT: CPU PIR 0x%04x called in\n", cpu->pir);
|
|
|
|
__secondary_cpu_entry();
|
|
}
|