1852 lines
40 KiB
C
1852 lines
40 KiB
C
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
|
|
/*
|
|
* Code to manage and manipulate CPUs
|
|
*
|
|
* Copyright 2013-2019 IBM Corp.
|
|
*/
|
|
|
|
#include <skiboot.h>
|
|
#include <cpu.h>
|
|
#include <device.h>
|
|
#include <mem_region.h>
|
|
#include <opal.h>
|
|
#include <stack.h>
|
|
#include <trace.h>
|
|
#include <affinity.h>
|
|
#include <chip.h>
|
|
#include <timebase.h>
|
|
#include <interrupts.h>
|
|
#include <ccan/str/str.h>
|
|
#include <ccan/container_of/container_of.h>
|
|
#include <xscom.h>
|
|
|
|
/* The cpu_threads array is static and indexed by PIR in
|
|
* order to speed up lookup from asm entry points
|
|
*/
|
|
struct cpu_stack {
|
|
union {
|
|
uint8_t stack[STACK_SIZE];
|
|
struct cpu_thread cpu;
|
|
};
|
|
} __align(STACK_SIZE);
|
|
|
|
static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
|
|
static unsigned int cpu_threads_max;
|
|
unsigned int cpu_thread_count;
|
|
unsigned int cpu_max_pir;
|
|
struct cpu_thread *boot_cpu;
|
|
static struct lock reinit_lock = LOCK_UNLOCKED;
|
|
static bool radix_supported;
|
|
static unsigned long hid0_hile;
|
|
static unsigned long hid0_attn;
|
|
static unsigned long hid0_icache;
|
|
static bool reconfigure_idle = false;
|
|
static bool sreset_enabled;
|
|
static bool ipi_enabled;
|
|
static bool pm_enabled;
|
|
static bool current_hile_mode = HAVE_LITTLE_ENDIAN;
|
|
static bool current_radix_mode = true;
|
|
static bool tm_suspend_enabled;
|
|
|
|
unsigned long cpu_secondary_start __force_data = 0;
|
|
|
|
struct cpu_job {
|
|
struct list_node link;
|
|
void (*func)(void *data);
|
|
void *data;
|
|
const char *name;
|
|
bool complete;
|
|
bool no_return;
|
|
};
|
|
|
|
/* attribute const as cpu_stacks is constant. */
|
|
unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
|
|
{
|
|
return ((unsigned long)&cpu_stacks[pir]) +
|
|
sizeof(struct cpu_thread) + STACK_SAFETY_GAP;
|
|
}
|
|
|
|
unsigned long __attrconst cpu_stack_top(unsigned int pir)
|
|
{
|
|
/* This is the top of the normal stack. */
|
|
return ((unsigned long)&cpu_stacks[pir]) +
|
|
NORMAL_STACK_SIZE - STACK_TOP_GAP;
|
|
}
|
|
|
|
unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir)
|
|
{
|
|
/* This is the top of the emergency stack, above the normal stack. */
|
|
return ((unsigned long)&cpu_stacks[pir]) +
|
|
NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP;
|
|
}
|
|
|
|
void __nomcount cpu_relax(void)
|
|
{
|
|
if ((mfspr(SPR_PPR32) >> 18) != 0x4) {
|
|
printf("cpu_relax called when not at medium SMT priority: "
|
|
"PPR[PRI]=0x%lx\n", mfspr(SPR_PPR32) >> 18);
|
|
backtrace();
|
|
}
|
|
|
|
/* Relax a bit to give sibling threads some breathing space */
|
|
smt_lowest();
|
|
asm volatile("nop; nop; nop; nop;\n"
|
|
"nop; nop; nop; nop;\n"
|
|
"nop; nop; nop; nop;\n"
|
|
"nop; nop; nop; nop;\n");
|
|
smt_medium();
|
|
barrier();
|
|
}
|
|
|
|
static void cpu_send_ipi(struct cpu_thread *cpu)
|
|
{
|
|
if (proc_gen == proc_gen_p8) {
|
|
/* Poke IPI */
|
|
icp_kick_cpu(cpu);
|
|
} else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10 ||
|
|
proc_gen == proc_gen_p11) {
|
|
p9_dbell_send(cpu->pir);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If chip_id is >= 0, schedule the job on that node.
|
|
* Otherwise schedule the job anywhere.
|
|
*/
|
|
static struct cpu_thread *cpu_find_job_target(int32_t chip_id)
|
|
{
|
|
struct cpu_thread *cpu, *best, *me = this_cpu();
|
|
uint32_t best_count;
|
|
|
|
/* We try to find a target to run a job. We need to avoid
|
|
* a CPU that has a "no return" job on its queue as it might
|
|
* never be able to process anything.
|
|
*
|
|
* Additionally we don't check the list but the job count
|
|
* on the target CPUs, since that is decremented *after*
|
|
* a job has been completed.
|
|
*/
|
|
|
|
|
|
/* First we scan all available primary threads
|
|
*/
|
|
for_each_available_cpu(cpu) {
|
|
if (chip_id >= 0 && cpu->chip_id != chip_id)
|
|
continue;
|
|
if (cpu == me || !cpu_is_thread0(cpu) || cpu->job_has_no_return)
|
|
continue;
|
|
if (cpu->job_count)
|
|
continue;
|
|
lock(&cpu->job_lock);
|
|
if (!cpu->job_count)
|
|
return cpu;
|
|
unlock(&cpu->job_lock);
|
|
}
|
|
|
|
/* Now try again with secondary threads included and keep
|
|
* track of the one with the less jobs queued up. This is
|
|
* done in a racy way, but it's just an optimization in case
|
|
* we are overcommitted on jobs. Could could also just pick
|
|
* a random one...
|
|
*/
|
|
best = NULL;
|
|
best_count = -1u;
|
|
for_each_available_cpu(cpu) {
|
|
if (chip_id >= 0 && cpu->chip_id != chip_id)
|
|
continue;
|
|
if (cpu == me || cpu->job_has_no_return)
|
|
continue;
|
|
if (!best || cpu->job_count < best_count) {
|
|
best = cpu;
|
|
best_count = cpu->job_count;
|
|
}
|
|
if (cpu->job_count)
|
|
continue;
|
|
lock(&cpu->job_lock);
|
|
if (!cpu->job_count)
|
|
return cpu;
|
|
unlock(&cpu->job_lock);
|
|
}
|
|
|
|
/* We haven't found anybody, do we have a bestie ? */
|
|
if (best) {
|
|
lock(&best->job_lock);
|
|
return best;
|
|
}
|
|
|
|
/* Go away */
|
|
return NULL;
|
|
}
|
|
|
|
/* job_lock is held, returns with it released */
|
|
static void queue_job_on_cpu(struct cpu_thread *cpu, struct cpu_job *job)
|
|
{
|
|
/* That's bad, the job will never run */
|
|
if (cpu->job_has_no_return) {
|
|
prlog(PR_WARNING, "WARNING ! Job %s scheduled on CPU 0x%x"
|
|
" which has a no-return job on its queue !\n",
|
|
job->name, cpu->pir);
|
|
backtrace();
|
|
}
|
|
list_add_tail(&cpu->job_queue, &job->link);
|
|
if (job->no_return)
|
|
cpu->job_has_no_return = true;
|
|
else
|
|
cpu->job_count++;
|
|
unlock(&cpu->job_lock);
|
|
|
|
/* Is it idle waiting for jobs? If so, must send an IPI. */
|
|
sync();
|
|
if (cpu->in_job_sleep)
|
|
cpu_send_ipi(cpu);
|
|
}
|
|
|
|
struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
|
|
const char *name,
|
|
void (*func)(void *data), void *data,
|
|
bool no_return)
|
|
{
|
|
struct cpu_job *job;
|
|
|
|
#ifdef DEBUG_SERIALIZE_CPU_JOBS
|
|
if (cpu == NULL)
|
|
cpu = this_cpu();
|
|
#endif
|
|
|
|
if (cpu && !cpu_is_available(cpu)) {
|
|
prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
|
|
cpu->pir);
|
|
return NULL;
|
|
}
|
|
|
|
job = zalloc(sizeof(struct cpu_job));
|
|
if (!job)
|
|
return NULL;
|
|
job->func = func;
|
|
job->data = data;
|
|
job->name = name;
|
|
job->complete = false;
|
|
job->no_return = no_return;
|
|
|
|
/* Pick a candidate. Returns with target queue locked */
|
|
if (cpu == NULL)
|
|
cpu = cpu_find_job_target(-1);
|
|
else if (cpu != this_cpu())
|
|
lock(&cpu->job_lock);
|
|
else
|
|
cpu = NULL;
|
|
|
|
/* Can't be scheduled, run it now */
|
|
if (cpu == NULL) {
|
|
if (!this_cpu()->job_has_no_return)
|
|
this_cpu()->job_has_no_return = no_return;
|
|
func(data);
|
|
job->complete = true;
|
|
return job;
|
|
}
|
|
|
|
queue_job_on_cpu(cpu, job);
|
|
|
|
return job;
|
|
}
|
|
|
|
struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
|
|
const char *name,
|
|
void (*func)(void *data), void *data)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job *job;
|
|
|
|
job = zalloc(sizeof(struct cpu_job));
|
|
if (!job)
|
|
return NULL;
|
|
job->func = func;
|
|
job->data = data;
|
|
job->name = name;
|
|
job->complete = false;
|
|
job->no_return = false;
|
|
|
|
/* Pick a candidate. Returns with target queue locked */
|
|
cpu = cpu_find_job_target(chip_id);
|
|
|
|
/* Can't be scheduled... */
|
|
if (cpu == NULL) {
|
|
cpu = this_cpu();
|
|
if (cpu->chip_id == chip_id) {
|
|
/* Run it now if we're the right node. */
|
|
func(data);
|
|
job->complete = true;
|
|
return job;
|
|
}
|
|
/* Otherwise fail. */
|
|
free(job);
|
|
return NULL;
|
|
}
|
|
|
|
queue_job_on_cpu(cpu, job);
|
|
|
|
return job;
|
|
}
|
|
|
|
bool cpu_poll_job(struct cpu_job *job)
|
|
{
|
|
lwsync();
|
|
return job->complete;
|
|
}
|
|
|
|
void cpu_wait_job(struct cpu_job *job, bool free_it)
|
|
{
|
|
unsigned long time_waited = 0;
|
|
|
|
if (!job)
|
|
return;
|
|
|
|
while (!job->complete) {
|
|
/* This will call OPAL pollers for us */
|
|
time_wait_ms(10);
|
|
time_waited += 10;
|
|
lwsync();
|
|
if ((time_waited % 30000) == 0) {
|
|
prlog(PR_INFO, "cpu_wait_job(%s) for %lums\n",
|
|
job->name, time_waited);
|
|
backtrace();
|
|
}
|
|
}
|
|
lwsync();
|
|
|
|
if (time_waited > 1000)
|
|
prlog(PR_DEBUG, "cpu_wait_job(%s) for %lums\n",
|
|
job->name, time_waited);
|
|
|
|
if (free_it)
|
|
free(job);
|
|
}
|
|
|
|
bool cpu_check_jobs(struct cpu_thread *cpu)
|
|
{
|
|
return !list_empty_nocheck(&cpu->job_queue);
|
|
}
|
|
|
|
void cpu_process_jobs(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
struct cpu_job *job = NULL;
|
|
void (*func)(void *);
|
|
void *data;
|
|
|
|
sync();
|
|
if (!cpu_check_jobs(cpu))
|
|
return;
|
|
|
|
lock(&cpu->job_lock);
|
|
while (true) {
|
|
bool no_return;
|
|
|
|
job = list_pop(&cpu->job_queue, struct cpu_job, link);
|
|
if (!job)
|
|
break;
|
|
|
|
func = job->func;
|
|
data = job->data;
|
|
no_return = job->no_return;
|
|
unlock(&cpu->job_lock);
|
|
prlog(PR_TRACE, "running job %s on %x\n", job->name, cpu->pir);
|
|
if (no_return)
|
|
free(job);
|
|
func(data);
|
|
if (!list_empty(&cpu->locks_held)) {
|
|
if (no_return)
|
|
prlog(PR_ERR, "OPAL no-return job returned with"
|
|
"locks held!\n");
|
|
else
|
|
prlog(PR_ERR, "OPAL job %s returning with locks held\n",
|
|
job->name);
|
|
drop_my_locks(true);
|
|
}
|
|
lock(&cpu->job_lock);
|
|
if (!no_return) {
|
|
cpu->job_count--;
|
|
lwsync();
|
|
job->complete = true;
|
|
}
|
|
}
|
|
unlock(&cpu->job_lock);
|
|
}
|
|
|
|
enum cpu_wake_cause {
|
|
cpu_wake_on_job,
|
|
cpu_wake_on_dec,
|
|
};
|
|
|
|
static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
|
|
{
|
|
uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE;
|
|
unsigned int vec;
|
|
|
|
/* Clean up ICP, be ready for IPIs */
|
|
icp_prep_for_pm();
|
|
|
|
/* Setup wakup cause in LPCR: EE (for IPI) */
|
|
lpcr |= SPR_LPCR_P8_PECE2;
|
|
if (wake_on == cpu_wake_on_dec)
|
|
lpcr |= SPR_LPCR_P8_PECE3; /* DEC */
|
|
mtspr(SPR_LPCR, lpcr);
|
|
isync();
|
|
|
|
/* Enter nap */
|
|
vec = enter_p8_pm_state(false);
|
|
|
|
reset_cpu_icp();
|
|
|
|
return vec;
|
|
}
|
|
|
|
static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
|
|
{
|
|
uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE;
|
|
uint64_t psscr;
|
|
unsigned int vec;
|
|
|
|
lpcr |= SPR_LPCR_P9_PECEL1; /* HV DBELL for IPI */
|
|
if (wake_on == cpu_wake_on_dec)
|
|
lpcr |= SPR_LPCR_P9_PECEL3; /* DEC */
|
|
mtspr(SPR_LPCR, lpcr);
|
|
isync();
|
|
|
|
if (sreset_enabled) {
|
|
/* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */
|
|
/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
|
|
psscr = PPC_BIT(42) | PPC_BIT(43) |
|
|
PPC_BITMASK(54, 55) | PPC_BIT(63);
|
|
vec = enter_p9_pm_state(psscr);
|
|
} else {
|
|
/* stop with EC=0 (resumes) which does not require sreset. */
|
|
/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
|
|
psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
|
|
enter_p9_pm_lite_state(psscr);
|
|
vec = 0;
|
|
}
|
|
|
|
/* Clear doorbell */
|
|
p9_dbell_receive();
|
|
|
|
return vec;
|
|
}
|
|
|
|
static void cpu_idle_pm(enum cpu_wake_cause wake_on)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
unsigned int vec;
|
|
|
|
if (!pm_enabled) {
|
|
prlog_once(PR_DEBUG, "cpu_idle_pm called pm disabled\n");
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Mark ourselves in sleep so other CPUs know to send an IPI,
|
|
* then re-check the wake conditions. This is ordered against
|
|
* queue_job_on_cpu() and reconfigure_idle_start() which first
|
|
* set the wake conditions (either queue a job or set
|
|
* reconfigure_idle = true), issue a sync(), then test if the
|
|
* target is in_sleep / in_job_sleep.
|
|
*/
|
|
cpu->in_sleep = true;
|
|
if (wake_on == cpu_wake_on_job)
|
|
cpu->in_job_sleep = true;
|
|
sync();
|
|
if (reconfigure_idle)
|
|
goto skip_sleep;
|
|
if (wake_on == cpu_wake_on_job && cpu_check_jobs(cpu))
|
|
goto skip_sleep;
|
|
|
|
if (proc_gen == proc_gen_p8)
|
|
vec = cpu_idle_p8(wake_on);
|
|
else
|
|
vec = cpu_idle_p9(wake_on);
|
|
|
|
if (vec == 0x100) {
|
|
unsigned long srr1 = mfspr(SPR_SRR1);
|
|
|
|
switch (srr1 & SPR_SRR1_PM_WAKE_MASK) {
|
|
case SPR_SRR1_PM_WAKE_SRESET:
|
|
exception_entry_pm_sreset();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
mtmsrd(MSR_RI, 1);
|
|
|
|
} else if (vec == 0x200) {
|
|
exception_entry_pm_mce();
|
|
enable_machine_check();
|
|
mtmsrd(MSR_RI, 1);
|
|
}
|
|
|
|
skip_sleep:
|
|
sync();
|
|
cpu->in_sleep = false;
|
|
if (wake_on == cpu_wake_on_job)
|
|
cpu->in_job_sleep = false;
|
|
}
|
|
|
|
static struct lock idle_lock = LOCK_UNLOCKED;
|
|
static int nr_cpus_idle = 0;
|
|
|
|
static void enter_idle(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
assert(!cpu->in_idle);
|
|
assert(!cpu->in_sleep);
|
|
assert(!cpu->in_job_sleep);
|
|
|
|
for (;;) {
|
|
lock(&idle_lock);
|
|
if (!reconfigure_idle) {
|
|
nr_cpus_idle++;
|
|
cpu->in_idle = true;
|
|
break;
|
|
}
|
|
unlock(&idle_lock);
|
|
|
|
/* Another CPU is reconfiguring idle */
|
|
smt_lowest();
|
|
while (reconfigure_idle)
|
|
barrier();
|
|
smt_medium();
|
|
}
|
|
|
|
unlock(&idle_lock);
|
|
}
|
|
|
|
static void exit_idle(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
assert(cpu->in_idle);
|
|
assert(!cpu->in_sleep);
|
|
assert(!cpu->in_job_sleep);
|
|
|
|
lock(&idle_lock);
|
|
assert(nr_cpus_idle > 0);
|
|
nr_cpus_idle--;
|
|
cpu->in_idle = false;
|
|
unlock(&idle_lock);
|
|
}
|
|
|
|
static void reconfigure_idle_start(void)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
|
|
/*
|
|
* First, make sure we are exclusive in reconfiguring by taking
|
|
* reconfigure_idle from false to true.
|
|
*/
|
|
for (;;) {
|
|
lock(&idle_lock);
|
|
if (!reconfigure_idle) {
|
|
reconfigure_idle = true;
|
|
break;
|
|
}
|
|
unlock(&idle_lock);
|
|
|
|
/* Someone else is reconfiguring */
|
|
smt_lowest();
|
|
while (reconfigure_idle)
|
|
barrier();
|
|
smt_medium();
|
|
}
|
|
|
|
unlock(&idle_lock);
|
|
|
|
/*
|
|
* Then kick everyone out of idle.
|
|
*/
|
|
|
|
/*
|
|
* Order earlier store to reconfigure_idle=true vs load from
|
|
* cpu->in_sleep.
|
|
*/
|
|
sync();
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (cpu->in_sleep)
|
|
cpu_send_ipi(cpu);
|
|
}
|
|
|
|
/*
|
|
* Then wait for all other CPUs to leave idle. Now they will see
|
|
* reconfigure_idle==true and not re-enter idle.
|
|
*/
|
|
smt_lowest();
|
|
while (nr_cpus_idle != 0)
|
|
barrier();
|
|
smt_medium();
|
|
|
|
/*
|
|
* Order load of nr_cpus_idle with later loads of data that other
|
|
* CPUs might have stored-to before coming out of idle.
|
|
*/
|
|
lwsync();
|
|
}
|
|
|
|
static void reconfigure_idle_end(void)
|
|
{
|
|
assert(reconfigure_idle);
|
|
lock(&idle_lock);
|
|
reconfigure_idle = false;
|
|
unlock(&idle_lock);
|
|
}
|
|
|
|
void cpu_idle_job(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
do {
|
|
enter_idle();
|
|
|
|
if (pm_enabled) {
|
|
cpu_idle_pm(cpu_wake_on_job);
|
|
} else {
|
|
smt_lowest();
|
|
for (;;) {
|
|
if (cpu_check_jobs(cpu))
|
|
break;
|
|
if (reconfigure_idle)
|
|
break;
|
|
barrier();
|
|
}
|
|
smt_medium();
|
|
}
|
|
|
|
exit_idle();
|
|
|
|
} while (!cpu_check_jobs(cpu));
|
|
}
|
|
|
|
void cpu_idle_delay(unsigned long delay)
|
|
{
|
|
unsigned long now = mftb();
|
|
unsigned long end = now + delay;
|
|
unsigned long min_pm = usecs_to_tb(10);
|
|
|
|
do {
|
|
enter_idle();
|
|
|
|
delay = end - now;
|
|
|
|
if (pm_enabled && delay > min_pm) {
|
|
if (delay >= 0x7fffffff)
|
|
delay = 0x7fffffff;
|
|
mtspr(SPR_DEC, delay);
|
|
|
|
cpu_idle_pm(cpu_wake_on_dec);
|
|
} else {
|
|
smt_lowest();
|
|
for (;;) {
|
|
if (tb_compare(mftb(), end) == TB_AAFTERB)
|
|
break;
|
|
if (reconfigure_idle)
|
|
break;
|
|
barrier();
|
|
}
|
|
smt_medium();
|
|
}
|
|
|
|
exit_idle();
|
|
|
|
now = mftb();
|
|
|
|
} while (tb_compare(now, end) != TB_AAFTERB);
|
|
}
|
|
|
|
static void recalc_pm_enabled(void)
|
|
{
|
|
if (chip_quirk(QUIRK_AWAN))
|
|
return;
|
|
|
|
if (proc_gen == proc_gen_p8)
|
|
pm_enabled = ipi_enabled && sreset_enabled;
|
|
else
|
|
pm_enabled = ipi_enabled;
|
|
}
|
|
|
|
void cpu_set_sreset_enable(bool enabled)
|
|
{
|
|
if (sreset_enabled == enabled)
|
|
return;
|
|
|
|
if (proc_gen == proc_gen_p8) {
|
|
/* Public P8 Mambo has broken NAP */
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
|
|
return;
|
|
}
|
|
|
|
reconfigure_idle_start();
|
|
|
|
sreset_enabled = enabled;
|
|
|
|
recalc_pm_enabled();
|
|
|
|
reconfigure_idle_end();
|
|
}
|
|
|
|
void cpu_set_ipi_enable(bool enabled)
|
|
{
|
|
if (ipi_enabled == enabled)
|
|
return;
|
|
|
|
reconfigure_idle_start();
|
|
|
|
ipi_enabled = enabled;
|
|
|
|
recalc_pm_enabled();
|
|
|
|
reconfigure_idle_end();
|
|
}
|
|
|
|
void cpu_process_local_jobs(void)
|
|
{
|
|
struct cpu_thread *cpu = first_available_cpu();
|
|
|
|
while (cpu) {
|
|
if (cpu != this_cpu())
|
|
return;
|
|
|
|
cpu = next_available_cpu(cpu);
|
|
}
|
|
|
|
if (!cpu)
|
|
cpu = first_available_cpu();
|
|
|
|
/* No CPU to run on, just run synchro */
|
|
if (cpu == this_cpu()) {
|
|
prlog_once(PR_DEBUG, "Processing jobs synchronously\n");
|
|
cpu_process_jobs();
|
|
opal_run_pollers();
|
|
}
|
|
}
|
|
|
|
|
|
struct dt_node *get_cpu_node(u32 pir)
|
|
{
|
|
struct cpu_thread *t = find_cpu_by_pir(pir);
|
|
|
|
return t ? t->node : NULL;
|
|
}
|
|
|
|
/* This only covers primary, active cpus */
|
|
struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
|
|
{
|
|
struct cpu_thread *t;
|
|
|
|
for_each_available_cpu(t) {
|
|
if (t->is_secondary)
|
|
continue;
|
|
if (t->chip_id == chip_id)
|
|
return t;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
|
|
{
|
|
struct cpu_thread *t;
|
|
|
|
for_each_available_cpu(t) {
|
|
if (t->node == cpu)
|
|
return t;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct cpu_thread *find_cpu_by_pir(u32 pir)
|
|
{
|
|
if (pir > cpu_max_pir)
|
|
return NULL;
|
|
return &cpu_stacks[pir].cpu;
|
|
}
|
|
|
|
struct cpu_thread __nomcount *find_cpu_by_pir_nomcount(u32 pir)
|
|
{
|
|
if (pir > cpu_max_pir)
|
|
return NULL;
|
|
return &cpu_stacks[pir].cpu;
|
|
}
|
|
|
|
struct cpu_thread *find_cpu_by_server(u32 server_no)
|
|
{
|
|
struct cpu_thread *t;
|
|
|
|
for_each_cpu(t) {
|
|
if (t->server_no == server_no)
|
|
return t;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct cpu_thread *next_cpu(struct cpu_thread *cpu)
|
|
{
|
|
struct cpu_stack *s;
|
|
unsigned int index = 0;
|
|
|
|
if (cpu != NULL) {
|
|
s = container_of(cpu, struct cpu_stack, cpu);
|
|
index = s - cpu_stacks + 1;
|
|
}
|
|
for (; index <= cpu_max_pir; index++) {
|
|
cpu = &cpu_stacks[index].cpu;
|
|
if (cpu->state != cpu_state_no_cpu)
|
|
return cpu;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct cpu_thread *first_cpu(void)
|
|
{
|
|
return next_cpu(NULL);
|
|
}
|
|
|
|
struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
|
|
{
|
|
do {
|
|
cpu = next_cpu(cpu);
|
|
} while(cpu && !cpu_is_available(cpu));
|
|
|
|
return cpu;
|
|
}
|
|
|
|
struct cpu_thread *first_available_cpu(void)
|
|
{
|
|
return next_available_cpu(NULL);
|
|
}
|
|
|
|
struct cpu_thread *next_present_cpu(struct cpu_thread *cpu)
|
|
{
|
|
do {
|
|
cpu = next_cpu(cpu);
|
|
} while(cpu && !cpu_is_present(cpu));
|
|
|
|
return cpu;
|
|
}
|
|
|
|
struct cpu_thread *first_present_cpu(void)
|
|
{
|
|
return next_present_cpu(NULL);
|
|
}
|
|
|
|
struct cpu_thread *next_ungarded_cpu(struct cpu_thread *cpu)
|
|
{
|
|
do {
|
|
cpu = next_cpu(cpu);
|
|
} while(cpu && cpu->state == cpu_state_unavailable);
|
|
|
|
return cpu;
|
|
}
|
|
|
|
struct cpu_thread *first_ungarded_cpu(void)
|
|
{
|
|
return next_ungarded_cpu(NULL);
|
|
}
|
|
|
|
struct cpu_thread *next_ungarded_primary(struct cpu_thread *cpu)
|
|
{
|
|
do {
|
|
cpu = next_ungarded_cpu(cpu);
|
|
} while (cpu && !(cpu == cpu->primary || cpu == cpu->ec_primary));
|
|
|
|
return cpu;
|
|
}
|
|
|
|
struct cpu_thread *first_ungarded_primary(void)
|
|
{
|
|
return next_ungarded_primary(NULL);
|
|
}
|
|
|
|
u8 get_available_nr_cores_in_chip(u32 chip_id)
|
|
{
|
|
struct cpu_thread *core;
|
|
u8 nr_cores = 0;
|
|
|
|
for_each_available_core_in_chip(core, chip_id)
|
|
nr_cores++;
|
|
|
|
return nr_cores;
|
|
}
|
|
|
|
struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
|
|
u32 chip_id)
|
|
{
|
|
do {
|
|
core = next_cpu(core);
|
|
} while(core && (!cpu_is_available(core) ||
|
|
core->chip_id != chip_id ||
|
|
core->is_secondary));
|
|
return core;
|
|
}
|
|
|
|
struct cpu_thread *first_available_core_in_chip(u32 chip_id)
|
|
{
|
|
return next_available_core_in_chip(NULL, chip_id);
|
|
}
|
|
|
|
uint32_t cpu_get_core_index(struct cpu_thread *cpu)
|
|
{
|
|
return pir_to_fused_core_id(cpu->pir);
|
|
}
|
|
|
|
void cpu_remove_node(const struct cpu_thread *t)
|
|
{
|
|
struct dt_node *i;
|
|
|
|
/* Find this cpu node */
|
|
dt_for_each_node(dt_root, i) {
|
|
const struct dt_property *p;
|
|
|
|
if (!dt_has_node_property(i, "device_type", "cpu"))
|
|
continue;
|
|
p = dt_find_property(i, "ibm,pir");
|
|
if (!p)
|
|
continue;
|
|
if (dt_property_get_cell(p, 0) == t->pir) {
|
|
dt_free(i);
|
|
return;
|
|
}
|
|
}
|
|
prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
|
|
abort();
|
|
}
|
|
|
|
void cpu_disable_all_threads(struct cpu_thread *cpu)
|
|
{
|
|
unsigned int i;
|
|
struct dt_property *p;
|
|
|
|
for (i = 0; i <= cpu_max_pir; i++) {
|
|
struct cpu_thread *t = &cpu_stacks[i].cpu;
|
|
|
|
if (t->primary == cpu->primary)
|
|
t->state = cpu_state_disabled;
|
|
|
|
}
|
|
|
|
/* Mark this core as bad so that Linux kernel don't use this CPU. */
|
|
prlog(PR_DEBUG, "CPU: Mark CPU bad (PIR 0x%04x)...\n", cpu->pir);
|
|
p = __dt_find_property(cpu->node, "status");
|
|
if (p)
|
|
dt_del_property(cpu->node, p);
|
|
|
|
dt_add_property_string(cpu->node, "status", "bad");
|
|
|
|
/* XXX Do something to actually stop the core */
|
|
}
|
|
|
|
static void init_cpu_thread(struct cpu_thread *t,
|
|
enum cpu_thread_state state,
|
|
unsigned int pir)
|
|
{
|
|
/* offset within cpu_thread to prevent stack_guard clobber */
|
|
const size_t guard_skip = container_off_var(t, stack_guard) +
|
|
sizeof(t->stack_guard);
|
|
|
|
memset(((void *)t) + guard_skip, 0, sizeof(struct cpu_thread) - guard_skip);
|
|
init_lock(&t->dctl_lock);
|
|
init_lock(&t->job_lock);
|
|
list_head_init(&t->job_queue);
|
|
list_head_init(&t->locks_held);
|
|
t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
|
|
t->state = state;
|
|
t->pir = pir;
|
|
#ifdef STACK_CHECK_ENABLED
|
|
t->stack_bot_mark = LONG_MAX;
|
|
#endif
|
|
t->is_fused_core = is_fused_core(mfspr(SPR_PVR));
|
|
assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
|
|
}
|
|
|
|
static void enable_attn(void)
|
|
{
|
|
unsigned long hid0;
|
|
|
|
hid0 = mfspr(SPR_HID0);
|
|
hid0 |= hid0_attn;
|
|
set_hid0(hid0);
|
|
if (hid0_icache) {
|
|
if (hid0 & hid0_icache) {
|
|
prlog(PR_WARNING, "enable_attn found hid0_cache bit set unexpectedly\n");
|
|
hid0 &= ~hid0_icache;
|
|
}
|
|
/* icache is flushed on hid0_icache 0->1 */
|
|
set_hid0(hid0 | hid0_icache);
|
|
set_hid0(hid0);
|
|
}
|
|
|
|
}
|
|
|
|
static void disable_attn(void)
|
|
{
|
|
unsigned long hid0;
|
|
|
|
hid0 = mfspr(SPR_HID0);
|
|
hid0 &= ~hid0_attn;
|
|
set_hid0(hid0);
|
|
if (hid0_icache) {
|
|
if (hid0 & hid0_icache) {
|
|
prlog(PR_WARNING, "disable_attn found hid0_cache bit set unexpectedly\n");
|
|
hid0 &= ~hid0_icache;
|
|
}
|
|
/* icache is flushed on hid0_icache 0->1 */
|
|
set_hid0(hid0 | hid0_icache);
|
|
set_hid0(hid0);
|
|
}
|
|
}
|
|
|
|
extern void __trigger_attn(void);
|
|
void trigger_attn(void)
|
|
{
|
|
enable_attn();
|
|
__trigger_attn();
|
|
}
|
|
|
|
static void init_hid(void)
|
|
{
|
|
/* attn is enabled even when HV=0, so make sure it's off */
|
|
disable_attn();
|
|
}
|
|
|
|
void __nomcount pre_init_boot_cpu(void)
|
|
{
|
|
struct cpu_thread *cpu = this_cpu();
|
|
|
|
/* We skip the stack guard ! */
|
|
memset(((void *)cpu) + 8, 0, sizeof(struct cpu_thread) - 8);
|
|
}
|
|
|
|
void init_boot_cpu(void)
|
|
{
|
|
unsigned int pir, pvr;
|
|
|
|
pir = mfspr(SPR_PIR);
|
|
pvr = mfspr(SPR_PVR);
|
|
|
|
/* Get CPU family and other flags based on PVR */
|
|
switch(PVR_TYPE(pvr)) {
|
|
case PVR_TYPE_P8E:
|
|
case PVR_TYPE_P8:
|
|
proc_gen = proc_gen_p8;
|
|
hid0_hile = SPR_HID0_POWER8_HILE;
|
|
hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
|
|
break;
|
|
case PVR_TYPE_P8NVL:
|
|
proc_gen = proc_gen_p8;
|
|
hid0_hile = SPR_HID0_POWER8_HILE;
|
|
hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
|
|
break;
|
|
case PVR_TYPE_P9:
|
|
case PVR_TYPE_P9P:
|
|
proc_gen = proc_gen_p9;
|
|
radix_supported = true;
|
|
hid0_hile = SPR_HID0_POWER9_HILE;
|
|
hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN;
|
|
hid0_icache = SPR_HID0_POWER9_FLUSH_ICACHE;
|
|
break;
|
|
case PVR_TYPE_P10:
|
|
proc_gen = proc_gen_p10;
|
|
radix_supported = true;
|
|
hid0_hile = SPR_HID0_POWER10_HILE;
|
|
hid0_attn = SPR_HID0_POWER10_ENABLE_ATTN;
|
|
hid0_icache = SPR_HID0_POWER10_FLUSH_ICACHE;
|
|
break;
|
|
case PVR_TYPE_P11:
|
|
proc_gen = proc_gen_p11;
|
|
radix_supported = true;
|
|
hid0_hile = SPR_HID0_POWER10_HILE;
|
|
hid0_attn = SPR_HID0_POWER10_ENABLE_ATTN;
|
|
hid0_icache = SPR_HID0_POWER10_FLUSH_ICACHE;
|
|
break;
|
|
default:
|
|
proc_gen = proc_gen_unknown;
|
|
}
|
|
|
|
/* Get a CPU thread count based on family */
|
|
switch(proc_gen) {
|
|
case proc_gen_p8:
|
|
cpu_threads_max = 8;
|
|
prlog(PR_INFO, "CPU: P8 generation processor"
|
|
" (max %d threads/core)\n", cpu_threads_max);
|
|
break;
|
|
case proc_gen_p9:
|
|
if (is_fused_core(pvr))
|
|
cpu_threads_max = 8;
|
|
else
|
|
cpu_threads_max = 4;
|
|
prlog(PR_INFO, "CPU: P9 generation processor"
|
|
" (max %d threads/core)\n", cpu_threads_max);
|
|
break;
|
|
case proc_gen_p10:
|
|
if (is_fused_core(pvr))
|
|
cpu_threads_max = 8;
|
|
else
|
|
cpu_threads_max = 4;
|
|
prlog(PR_INFO, "CPU: P10 generation processor"
|
|
" (max %d threads/core)\n", cpu_threads_max);
|
|
break;
|
|
case proc_gen_p11:
|
|
if (is_fused_core(pvr))
|
|
cpu_threads_max = 8;
|
|
else
|
|
cpu_threads_max = 4;
|
|
prlog(PR_INFO, "CPU: Power11 generation processor"
|
|
" (max %d threads/core)\n", cpu_thread_count);
|
|
break;
|
|
default:
|
|
prerror("CPU: Unknown PVR, assuming 1 thread\n");
|
|
cpu_threads_max = 1;
|
|
}
|
|
|
|
if (proc_gen == proc_gen_p8) {
|
|
#ifdef CONFIG_P8
|
|
if (PVR_VERS_MAJ(mfspr(SPR_PVR)) == 1) {
|
|
prerror("CPU: POWER8 DD1 is not supported\n");
|
|
abort();
|
|
}
|
|
#else
|
|
prerror("CPU: POWER8 detected but CONFIG_P8 not set\n");
|
|
abort();
|
|
#endif
|
|
}
|
|
|
|
if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
|
|
prerror("CPU: POWER9N DD1 is not supported\n");
|
|
abort();
|
|
}
|
|
|
|
prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
|
|
pir, pvr);
|
|
|
|
/*
|
|
* Adjust top of RAM to include the boot CPU stack. If we have less
|
|
* RAM than this, it's not possible to boot.
|
|
*/
|
|
cpu_max_pir = pir;
|
|
top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
|
|
|
|
/* Setup boot CPU state */
|
|
boot_cpu = &cpu_stacks[pir].cpu;
|
|
init_cpu_thread(boot_cpu, cpu_state_active, pir);
|
|
init_boot_tracebuf(boot_cpu);
|
|
assert(this_cpu() == boot_cpu);
|
|
init_hid();
|
|
}
|
|
|
|
static void enable_large_dec(bool on)
|
|
{
|
|
u64 lpcr = mfspr(SPR_LPCR);
|
|
|
|
if (on)
|
|
lpcr |= SPR_LPCR_P9_LD;
|
|
else
|
|
lpcr &= ~SPR_LPCR_P9_LD;
|
|
|
|
mtspr(SPR_LPCR, lpcr);
|
|
isync();
|
|
}
|
|
|
|
#define HIGH_BIT (1ull << 63)
|
|
|
|
static int find_dec_bits(void)
|
|
{
|
|
int bits = 65; /* we always decrement once */
|
|
u64 mask = ~0ull;
|
|
|
|
if (proc_gen < proc_gen_p9)
|
|
return 32;
|
|
|
|
/* The ISA doesn't specify the width of the decrementer register so we
|
|
* need to discover it. When in large mode (LPCR.LD = 1) reads from the
|
|
* DEC SPR are sign extended to 64 bits and writes are truncated to the
|
|
* physical register width. We can use this behaviour to detect the
|
|
* width by starting from an all 1s value and left shifting until we
|
|
* read a value from the DEC with it's high bit cleared.
|
|
*/
|
|
|
|
enable_large_dec(true);
|
|
|
|
do {
|
|
bits--;
|
|
mask = mask >> 1;
|
|
mtspr(SPR_DEC, mask);
|
|
} while (mfspr(SPR_DEC) & HIGH_BIT);
|
|
|
|
enable_large_dec(false);
|
|
|
|
prlog(PR_DEBUG, "CPU: decrementer bits %d\n", bits);
|
|
return bits;
|
|
}
|
|
|
|
static void init_tm_suspend_mode_property(void)
|
|
{
|
|
struct dt_node *node;
|
|
|
|
/* If we don't find anything, assume TM suspend is enabled */
|
|
tm_suspend_enabled = true;
|
|
|
|
node = dt_find_by_path(dt_root, "/ibm,opal/fw-features/tm-suspend-mode");
|
|
if (!node)
|
|
return;
|
|
|
|
if (dt_find_property(node, "disabled"))
|
|
tm_suspend_enabled = false;
|
|
}
|
|
|
|
void init_cpu_max_pir(void)
|
|
{
|
|
struct dt_node *cpus, *cpu;
|
|
|
|
cpus = dt_find_by_path(dt_root, "/cpus");
|
|
assert(cpus);
|
|
|
|
/* Iterate all CPUs in the device-tree */
|
|
dt_for_each_child(cpus, cpu) {
|
|
unsigned int pir, server_no, threads;
|
|
const struct dt_property *p;
|
|
|
|
/* Skip cache nodes */
|
|
if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
|
|
continue;
|
|
|
|
server_no = dt_prop_get_u32(cpu, "reg");
|
|
|
|
/* If PIR property is absent, assume it's the same as the
|
|
* server number
|
|
*/
|
|
pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
|
|
|
|
p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
|
|
if (!p)
|
|
continue;
|
|
threads = p->len / 4;
|
|
assert(threads > 0);
|
|
if (threads > cpu_threads_max) {
|
|
prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
|
|
" threads=%d max=%d\n",
|
|
pir, threads, cpu_threads_max);
|
|
threads = cpu_threads_max;
|
|
}
|
|
if (!cpu_thread_count) {
|
|
cpu_thread_count = threads;
|
|
} else {
|
|
/* Do not support asymmetric SMT topologies */
|
|
assert(cpu_thread_count == threads);
|
|
}
|
|
|
|
|
|
if (cpu_max_pir < pir + threads - 1)
|
|
cpu_max_pir = pir + threads - 1;
|
|
}
|
|
|
|
prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
|
|
}
|
|
|
|
/*
|
|
* Set cpu->state to cpu_state_no_cpu for all secondaries, before the dt is
|
|
* parsed and they will be flipped to present as populated CPUs are found.
|
|
*
|
|
* Some configurations (e.g., with memory encryption) will not zero system
|
|
* memory at boot, so can't rely on cpu->state to be zero (== cpu_state_no_cpu).
|
|
*/
|
|
static void mark_all_secondary_cpus_absent(void)
|
|
{
|
|
unsigned int pir;
|
|
struct cpu_thread *cpu;
|
|
|
|
for (pir = 0; pir <= cpu_max_pir; pir++) {
|
|
cpu = &cpu_stacks[pir].cpu;
|
|
if (cpu == boot_cpu)
|
|
continue;
|
|
cpu->state = cpu_state_no_cpu;
|
|
}
|
|
}
|
|
|
|
void init_all_cpus(void)
|
|
{
|
|
struct dt_node *cpus, *cpu;
|
|
unsigned int pir, thread;
|
|
int dec_bits = find_dec_bits();
|
|
|
|
cpus = dt_find_by_path(dt_root, "/cpus");
|
|
assert(cpus);
|
|
|
|
init_tm_suspend_mode_property();
|
|
|
|
mark_all_secondary_cpus_absent();
|
|
|
|
/* Iterate all CPUs in the device-tree */
|
|
dt_for_each_child(cpus, cpu) {
|
|
unsigned int server_no, chip_id, threads;
|
|
enum cpu_thread_state state;
|
|
const struct dt_property *p;
|
|
struct cpu_thread *t, *pt0, *pt1;
|
|
|
|
/* Skip cache nodes */
|
|
if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
|
|
continue;
|
|
|
|
server_no = dt_prop_get_u32(cpu, "reg");
|
|
|
|
/* If PIR property is absent, assume it's the same as the
|
|
* server number
|
|
*/
|
|
pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
|
|
|
|
/* We should always have an ibm,chip-id property */
|
|
chip_id = dt_get_chip_id(cpu);
|
|
|
|
/* Only use operational CPUs */
|
|
if (!strcmp(dt_prop_get(cpu, "status"), "okay")) {
|
|
state = cpu_state_present;
|
|
get_chip(chip_id)->ex_present = true;
|
|
} else {
|
|
state = cpu_state_unavailable;
|
|
}
|
|
|
|
prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
|
|
" State=%d\n", pir, server_no, state);
|
|
|
|
/* Setup thread 0 */
|
|
assert(pir <= cpu_max_pir);
|
|
t = pt0 = &cpu_stacks[pir].cpu;
|
|
|
|
if (t != boot_cpu) {
|
|
init_cpu_thread(t, state, pir);
|
|
/* Each cpu gets its own later in init_trace_buffers */
|
|
t->trace = boot_cpu->trace;
|
|
}
|
|
if (t->is_fused_core)
|
|
pt1 = &cpu_stacks[pir + 1].cpu;
|
|
else
|
|
pt1 = pt0;
|
|
t->server_no = server_no;
|
|
t->primary = t->ec_primary = t;
|
|
t->node = cpu;
|
|
t->chip_id = chip_id;
|
|
t->icp_regs = NULL; /* Will be set later */
|
|
#ifdef DEBUG_LOCKS
|
|
t->requested_lock = NULL;
|
|
#endif
|
|
t->core_hmi_state = 0;
|
|
t->core_hmi_state_ptr = &t->core_hmi_state;
|
|
|
|
/* Add associativity properties */
|
|
add_core_associativity(t);
|
|
|
|
/* Add the decrementer width property */
|
|
dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
|
|
|
|
if (t->is_fused_core)
|
|
dt_add_property(t->node, "ibm,fused-core", NULL, 0);
|
|
|
|
/* Iterate threads */
|
|
p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
|
|
if (!p)
|
|
continue;
|
|
threads = p->len / 4;
|
|
for (thread = 1; thread < threads; thread++) {
|
|
prlog(PR_TRACE, "CPU: secondary thread %d found\n",
|
|
thread);
|
|
t = &cpu_stacks[pir + thread].cpu;
|
|
init_cpu_thread(t, state, pir + thread);
|
|
t->trace = boot_cpu->trace;
|
|
t->server_no = dt_property_get_cell(p, thread);
|
|
t->is_secondary = true;
|
|
t->is_fused_core = pt0->is_fused_core;
|
|
t->primary = pt0;
|
|
t->ec_primary = (thread & 1) ? pt1 : pt0;
|
|
t->node = cpu;
|
|
t->chip_id = chip_id;
|
|
t->core_hmi_state_ptr = &pt0->core_hmi_state;
|
|
}
|
|
prlog(PR_INFO, "CPU: %d secondary threads\n", thread);
|
|
}
|
|
}
|
|
|
|
void cpu_bringup(void)
|
|
{
|
|
struct cpu_thread *t;
|
|
uint32_t count = 0;
|
|
|
|
prlog(PR_INFO, "CPU: Setting up secondary CPU state\n");
|
|
|
|
op_display(OP_LOG, OP_MOD_CPU, 0x0000);
|
|
|
|
/* Tell everybody to chime in ! */
|
|
prlog(PR_INFO, "CPU: Calling in all processors...\n");
|
|
cpu_secondary_start = 1;
|
|
sync();
|
|
|
|
op_display(OP_LOG, OP_MOD_CPU, 0x0002);
|
|
|
|
for_each_cpu(t) {
|
|
if (t->state != cpu_state_present &&
|
|
t->state != cpu_state_active)
|
|
continue;
|
|
|
|
/* Add a callin timeout ? If so, call cpu_remove_node(t). */
|
|
while (t->state != cpu_state_active) {
|
|
smt_lowest();
|
|
sync();
|
|
}
|
|
smt_medium();
|
|
count++;
|
|
}
|
|
|
|
prlog(PR_NOTICE, "CPU: All %d processors called in...\n", count);
|
|
|
|
op_display(OP_LOG, OP_MOD_CPU, 0x0003);
|
|
}
|
|
|
|
void cpu_callin(struct cpu_thread *cpu)
|
|
{
|
|
sync();
|
|
cpu->state = cpu_state_active;
|
|
sync();
|
|
|
|
cpu->job_has_no_return = false;
|
|
if (cpu_is_thread0(cpu))
|
|
init_hid();
|
|
}
|
|
|
|
static void opal_start_thread_job(void *data)
|
|
{
|
|
cpu_give_self_os();
|
|
|
|
/* We do not return, so let's mark the job as
|
|
* complete
|
|
*/
|
|
start_kernel_secondary((uint64_t)data);
|
|
}
|
|
|
|
static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job *job;
|
|
|
|
if (!opal_addr_valid((void *)start_address))
|
|
return OPAL_PARAMETER;
|
|
|
|
cpu = find_cpu_by_server(server_no);
|
|
if (!cpu) {
|
|
prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
|
|
server_no, cpu->pir, start_address);
|
|
|
|
lock(&reinit_lock);
|
|
if (!cpu_is_available(cpu)) {
|
|
unlock(&reinit_lock);
|
|
prerror("OPAL: CPU not active in OPAL !\n");
|
|
return OPAL_WRONG_STATE;
|
|
}
|
|
if (cpu->in_reinit) {
|
|
unlock(&reinit_lock);
|
|
prerror("OPAL: CPU being reinitialized !\n");
|
|
return OPAL_WRONG_STATE;
|
|
}
|
|
job = __cpu_queue_job(cpu, "start_thread",
|
|
opal_start_thread_job, (void *)start_address,
|
|
true);
|
|
unlock(&reinit_lock);
|
|
if (!job) {
|
|
prerror("OPAL: Failed to create CPU start job !\n");
|
|
return OPAL_INTERNAL_ERROR;
|
|
}
|
|
return OPAL_SUCCESS;
|
|
}
|
|
opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);
|
|
|
|
static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
|
|
if (!opal_addr_valid(thread_status))
|
|
return OPAL_PARAMETER;
|
|
|
|
cpu = find_cpu_by_server(server_no);
|
|
if (!cpu) {
|
|
prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
|
|
return OPAL_PARAMETER;
|
|
}
|
|
if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
|
|
prerror("OPAL: CPU not active in OPAL nor OS !\n");
|
|
return OPAL_PARAMETER;
|
|
}
|
|
switch(cpu->state) {
|
|
case cpu_state_os:
|
|
*thread_status = OPAL_THREAD_STARTED;
|
|
break;
|
|
case cpu_state_active:
|
|
/* Active in skiboot -> inactive in OS */
|
|
*thread_status = OPAL_THREAD_INACTIVE;
|
|
break;
|
|
default:
|
|
*thread_status = OPAL_THREAD_UNAVAILABLE;
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);
|
|
|
|
static int64_t opal_return_cpu(void)
|
|
{
|
|
prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
|
|
|
|
this_cpu()->in_opal_call--;
|
|
if (this_cpu()->in_opal_call != 0) {
|
|
printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
|
|
}
|
|
|
|
__secondary_cpu_entry();
|
|
|
|
return OPAL_HARDWARE; /* Should not happen */
|
|
}
|
|
opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);
|
|
|
|
struct hid0_change_req {
|
|
uint64_t clr_bits;
|
|
uint64_t set_bits;
|
|
};
|
|
|
|
static void cpu_change_hid0(void *__req)
|
|
{
|
|
struct hid0_change_req *req = __req;
|
|
unsigned long hid0, new_hid0;
|
|
|
|
hid0 = new_hid0 = mfspr(SPR_HID0);
|
|
new_hid0 &= ~req->clr_bits;
|
|
new_hid0 |= req->set_bits;
|
|
prlog(PR_DEBUG, "CPU: [%08x] HID0 change 0x%016lx -> 0x%016lx\n",
|
|
this_cpu()->pir, hid0, new_hid0);
|
|
set_hid0(new_hid0);
|
|
}
|
|
|
|
static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job **jobs;
|
|
|
|
jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
|
|
assert(jobs);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
|
|
continue;
|
|
if (cpu == this_cpu())
|
|
continue;
|
|
jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_change_hid0",
|
|
cpu_change_hid0, req);
|
|
}
|
|
|
|
/* this cpu */
|
|
cpu_change_hid0(req);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (jobs[cpu->pir])
|
|
cpu_wait_job(jobs[cpu->pir], true);
|
|
}
|
|
|
|
free(jobs);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
void cpu_set_hile_mode(bool hile)
|
|
{
|
|
struct hid0_change_req req;
|
|
|
|
if (hile == current_hile_mode)
|
|
return;
|
|
|
|
if (hile) {
|
|
req.clr_bits = 0;
|
|
req.set_bits = hid0_hile;
|
|
} else {
|
|
req.clr_bits = hid0_hile;
|
|
req.set_bits = 0;
|
|
}
|
|
cpu_change_all_hid0(&req);
|
|
current_hile_mode = hile;
|
|
}
|
|
|
|
static void cpu_cleanup_one(void *param __unused)
|
|
{
|
|
mtspr(SPR_AMR, 0);
|
|
mtspr(SPR_IAMR, 0);
|
|
mtspr(SPR_PCR, 0);
|
|
}
|
|
|
|
static int64_t cpu_cleanup_all(void)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job **jobs;
|
|
|
|
jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
|
|
assert(jobs);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (cpu == this_cpu())
|
|
continue;
|
|
jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup",
|
|
cpu_cleanup_one, NULL);
|
|
}
|
|
|
|
/* this cpu */
|
|
cpu_cleanup_one(NULL);
|
|
|
|
for_each_available_cpu(cpu) {
|
|
if (jobs[cpu->pir])
|
|
cpu_wait_job(jobs[cpu->pir], true);
|
|
}
|
|
|
|
free(jobs);
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
void cpu_fast_reboot_complete(void)
|
|
{
|
|
/* Fast reboot will have set HID0:HILE to skiboot endian */
|
|
current_hile_mode = HAVE_LITTLE_ENDIAN;
|
|
|
|
/* and set HID0:RADIX */
|
|
if (proc_gen == proc_gen_p9)
|
|
current_radix_mode = true;
|
|
|
|
/* P8 clears TLBs in cleanup_cpu_state() */
|
|
if (proc_gen >= proc_gen_p9)
|
|
cleanup_global_tlb();
|
|
}
|
|
|
|
static int64_t opal_reinit_cpus(uint64_t flags)
|
|
{
|
|
struct hid0_change_req req = { 0, 0 };
|
|
struct cpu_thread *cpu;
|
|
int64_t rc = OPAL_SUCCESS;
|
|
int i;
|
|
|
|
prlog(PR_DEBUG, "OPAL: CPU re-init with flags: 0x%llx\n", flags);
|
|
|
|
if (flags & OPAL_REINIT_CPUS_HILE_LE)
|
|
prlog(PR_INFO, "OPAL: Switch to little-endian OS\n");
|
|
else if (flags & OPAL_REINIT_CPUS_HILE_BE)
|
|
prlog(PR_INFO, "OPAL: Switch to big-endian OS\n");
|
|
|
|
again:
|
|
lock(&reinit_lock);
|
|
|
|
for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
|
|
if (cpu == this_cpu() || cpu->in_reinit)
|
|
continue;
|
|
if (cpu->state == cpu_state_os) {
|
|
unlock(&reinit_lock);
|
|
/*
|
|
* That might be a race with return CPU during kexec
|
|
* where we are still, wait a bit and try again
|
|
*/
|
|
for (i = 0; (i < 1000) &&
|
|
(cpu->state == cpu_state_os); i++) {
|
|
time_wait_ms(1);
|
|
}
|
|
if (cpu->state == cpu_state_os) {
|
|
prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
|
|
return OPAL_WRONG_STATE;
|
|
}
|
|
goto again;
|
|
}
|
|
cpu->in_reinit = true;
|
|
}
|
|
/*
|
|
* Now we need to mark ourselves "active" or we'll be skipped
|
|
* by the various "for_each_active_..."
|
|
*/
|
|
this_cpu()->state = cpu_state_active;
|
|
this_cpu()->in_reinit = true;
|
|
unlock(&reinit_lock);
|
|
|
|
/*
|
|
* This cleans up a few things left over by Linux
|
|
* that can cause problems in cases such as radix->hash
|
|
* transitions. Ideally Linux should do it but doing it
|
|
* here works around existing broken kernels.
|
|
*/
|
|
cpu_cleanup_all();
|
|
|
|
if (flags & (OPAL_REINIT_CPUS_HILE_BE |
|
|
OPAL_REINIT_CPUS_HILE_LE)) {
|
|
bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);
|
|
|
|
flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
|
|
if (hile != current_hile_mode) {
|
|
if (hile)
|
|
req.set_bits |= hid0_hile;
|
|
else
|
|
req.clr_bits |= hid0_hile;
|
|
current_hile_mode = hile;
|
|
}
|
|
}
|
|
|
|
/* If MMU mode change is supported */
|
|
if (radix_supported &&
|
|
(flags & (OPAL_REINIT_CPUS_MMU_HASH |
|
|
OPAL_REINIT_CPUS_MMU_RADIX))) {
|
|
bool radix = !!(flags & OPAL_REINIT_CPUS_MMU_RADIX);
|
|
|
|
flags &= ~(OPAL_REINIT_CPUS_MMU_HASH |
|
|
OPAL_REINIT_CPUS_MMU_RADIX);
|
|
|
|
if (proc_gen == proc_gen_p9 && radix != current_radix_mode) {
|
|
if (radix)
|
|
req.set_bits |= SPR_HID0_POWER9_RADIX;
|
|
else
|
|
req.clr_bits |= SPR_HID0_POWER9_RADIX;
|
|
|
|
current_radix_mode = radix;
|
|
}
|
|
}
|
|
|
|
/* Cleanup the TLB. We do that unconditionally, this works
|
|
* around issues where OSes fail to invalidate the PWC in Radix
|
|
* mode for example. This only works on P9 and later, but we
|
|
* also know we don't have a problem with Linux cleanups on
|
|
* P8 so this isn't a problem. If we wanted to cleanup the
|
|
* TLB on P8 as well, we'd have to use jobs to do it locally
|
|
* on each CPU.
|
|
*/
|
|
cleanup_global_tlb();
|
|
|
|
/* Apply HID bits changes if any */
|
|
if (req.set_bits || req.clr_bits)
|
|
cpu_change_all_hid0(&req);
|
|
|
|
if (flags & OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) {
|
|
flags &= ~OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED;
|
|
|
|
if (tm_suspend_enabled)
|
|
rc = OPAL_UNSUPPORTED;
|
|
else
|
|
rc = OPAL_SUCCESS;
|
|
}
|
|
|
|
if (flags != 0)
|
|
rc = OPAL_UNSUPPORTED;
|
|
|
|
/* And undo the above */
|
|
lock(&reinit_lock);
|
|
this_cpu()->state = cpu_state_os;
|
|
for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
|
|
cpu->in_reinit = false;
|
|
unlock(&reinit_lock);
|
|
|
|
return rc;
|
|
}
|
|
opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
|
|
|
|
#define NMMU_XLAT_CTL_PTCR 0xb
|
|
static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr)
|
|
{
|
|
uint32_t nmmu_base_addr;
|
|
|
|
nmmu_base_addr = dt_get_address(node, 0, NULL);
|
|
return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
|
|
}
|
|
|
|
/*
|
|
* Setup the the Nest MMU PTCR register for all chips in the system or
|
|
* the specified chip id.
|
|
*
|
|
* The PTCR value may be overwritten so long as all users have been
|
|
* quiesced. If it is set to an invalid memory address the system will
|
|
* checkstop if anything attempts to use it.
|
|
*
|
|
* Returns OPAL_UNSUPPORTED if no nest mmu was found.
|
|
*/
|
|
static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
|
|
{
|
|
struct dt_node *node;
|
|
int64_t rc = OPAL_UNSUPPORTED;
|
|
|
|
if (chip_id == -1ULL)
|
|
dt_for_each_compatible(dt_root, node, "ibm,power9-nest-mmu") {
|
|
chip_id = dt_get_chip_id(node);
|
|
if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
|
|
return rc;
|
|
}
|
|
else
|
|
dt_for_each_compatible_on_chip(dt_root, node, "ibm,power9-nest-mmu", chip_id)
|
|
if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
|
|
return rc;
|
|
|
|
return rc;
|
|
}
|
|
opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
|
|
|
|
static void _exit_uv_mode(void *data __unused)
|
|
{
|
|
prlog(PR_DEBUG, "Exit uv mode on cpu pir 0x%04x\n", this_cpu()->pir);
|
|
/* HW has smfctrl shared between threads but on Mambo it is per-thread */
|
|
if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
|
|
exit_uv_mode(1);
|
|
else
|
|
exit_uv_mode(cpu_is_thread0(this_cpu()));
|
|
}
|
|
|
|
void cpu_disable_pef(void)
|
|
{
|
|
struct cpu_thread *cpu;
|
|
struct cpu_job **jobs;
|
|
|
|
if (!(mfmsr() & MSR_S)) {
|
|
prlog(PR_DEBUG, "UV mode off on cpu pir 0x%04x\n", this_cpu()->pir);
|
|
return;
|
|
}
|
|
|
|
jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
|
|
assert(jobs);
|
|
|
|
/* Exit uv mode on all secondary threads before touching
|
|
* smfctrl on thread 0 */
|
|
for_each_available_cpu(cpu) {
|
|
if (cpu == this_cpu())
|
|
continue;
|
|
|
|
if (!cpu_is_thread0(cpu))
|
|
jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode",
|
|
_exit_uv_mode, NULL);
|
|
}
|
|
|
|
for_each_available_cpu(cpu)
|
|
if (jobs[cpu->pir]) {
|
|
cpu_wait_job(jobs[cpu->pir], true);
|
|
jobs[cpu->pir] = NULL;
|
|
}
|
|
|
|
/* Exit uv mode and disable smfctrl on primary threads */
|
|
for_each_available_cpu(cpu) {
|
|
if (cpu == this_cpu())
|
|
continue;
|
|
|
|
if (cpu_is_thread0(cpu))
|
|
jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode",
|
|
_exit_uv_mode, NULL);
|
|
}
|
|
|
|
for_each_available_cpu(cpu)
|
|
if (jobs[cpu->pir])
|
|
cpu_wait_job(jobs[cpu->pir], true);
|
|
|
|
free(jobs);
|
|
|
|
_exit_uv_mode(NULL);
|
|
}
|