diff options
Diffstat (limited to 'kernel/panic.c')
-rw-r--r-- | kernel/panic.c | 804 |
1 files changed, 804 insertions, 0 deletions
diff --git a/kernel/panic.c b/kernel/panic.c new file mode 100644 index 000000000..bc39e2b27 --- /dev/null +++ b/kernel/panic.c @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/kernel/panic.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * This function is used through-out the kernel (including mm and fs) + * to indicate a major problem. + */ +#include <linux/debug_locks.h> +#include <linux/sched/debug.h> +#include <linux/interrupt.h> +#include <linux/kgdb.h> +#include <linux/kmsg_dump.h> +#include <linux/kallsyms.h> +#include <linux/notifier.h> +#include <linux/vt_kern.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/ftrace.h> +#include <linux/reboot.h> +#include <linux/delay.h> +#include <linux/kexec.h> +#include <linux/sched.h> +#include <linux/sysrq.h> +#include <linux/init.h> +#include <linux/nmi.h> +#include <linux/console.h> +#include <linux/bug.h> +#include <linux/ratelimit.h> +#include <linux/debugfs.h> +#include <linux/sysfs.h> +#include <asm/sections.h> + +#define PANIC_TIMER_STEP 100 +#define PANIC_BLINK_SPD 18 + +#ifdef CONFIG_SMP +/* + * Should we dump all CPUs backtraces in an oops event? + * Defaults to 0, can be changed via sysctl. + */ +static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 +#endif /* CONFIG_SMP */ + +int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; +static unsigned long tainted_mask = + IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; +static int pause_on_oops; +static int pause_on_oops_flag; +static DEFINE_SPINLOCK(pause_on_oops_lock); +bool crash_kexec_post_notifiers; +int panic_on_warn __read_mostly; +unsigned long panic_on_taint; +bool panic_on_taint_nousertaint = false; +static unsigned int warn_limit __read_mostly; + +int panic_timeout = CONFIG_PANIC_TIMEOUT; +EXPORT_SYMBOL_GPL(panic_timeout); + +#define PANIC_PRINT_TASK_INFO 0x00000001 +#define PANIC_PRINT_MEM_INFO 0x00000002 +#define PANIC_PRINT_TIMER_INFO 0x00000004 +#define PANIC_PRINT_LOCK_INFO 0x00000008 +#define PANIC_PRINT_FTRACE_INFO 0x00000010 +#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 +unsigned long panic_print; + +ATOMIC_NOTIFIER_HEAD(panic_notifier_list); + +EXPORT_SYMBOL(panic_notifier_list); + +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_panic_table[] = { +#ifdef CONFIG_SMP + { + .procname = "oops_all_cpu_backtrace", + .data = &sysctl_oops_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "warn_limit", + .data = &warn_limit, + .maxlen = sizeof(warn_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_panic_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_panic_table); + return 0; +} +late_initcall(kernel_panic_sysctls_init); +#endif + +static atomic_t warn_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); +} + +static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); + +static __init int kernel_panic_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_panic_sysfs_init); +#endif + +static long no_blink(int state) +{ + return 0; +} + +/* Returns how long it waited in ms */ +long (*panic_blink)(int state); +EXPORT_SYMBOL(panic_blink); + +/* + * Stop ourself in panic -- architecture code may override this + */ +void __weak panic_smp_self_stop(void) +{ + while (1) + cpu_relax(); +} + +/* + * Stop ourselves in NMI context if another CPU has already panicked. Arch code + * may override this to prepare for crash dumping, e.g. save regs info. + */ +void __weak nmi_panic_self_stop(struct pt_regs *regs) +{ + panic_smp_self_stop(); +} + +/* + * Stop other CPUs in panic. Architecture dependent code may override this + * with more suitable version. For example, if the architecture supports + * crash dump, it should save registers of each stopped CPU and disable + * per-CPU features such as virtualization extensions. + */ +void __weak crash_smp_send_stop(void) +{ + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a panic + * situation. + */ + smp_send_stop(); + cpus_stopped = 1; +} + +atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); + +/* + * A variant of panic() called from NMI context. We return if we've already + * panicked on this CPU. If another CPU already panicked, loop in + * nmi_panic_self_stop() which can provide architecture dependent code such + * as saving register state for crash dump. + */ +void nmi_panic(struct pt_regs *regs, const char *msg) +{ + int old_cpu, cpu; + + cpu = raw_smp_processor_id(); + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); + + if (old_cpu == PANIC_CPU_INVALID) + panic("%s", msg); + else if (old_cpu != cpu) + nmi_panic_self_stop(regs); +} +EXPORT_SYMBOL(nmi_panic); + +static void panic_print_sys_info(void) +{ + if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) + console_flush_on_panic(CONSOLE_REPLAY_ALL); + + if (panic_print & PANIC_PRINT_TASK_INFO) + show_state(); + + if (panic_print & PANIC_PRINT_MEM_INFO) + show_mem(0, NULL); + + if (panic_print & PANIC_PRINT_TIMER_INFO) + sysrq_timer_list_show(); + + if (panic_print & PANIC_PRINT_LOCK_INFO) + debug_show_all_locks(); + + if (panic_print & PANIC_PRINT_FTRACE_INFO) + ftrace_dump(DUMP_ALL); +} + +void check_panic_on_warn(const char *origin) +{ + unsigned int limit; + + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); + + limit = READ_ONCE(warn_limit); + if (atomic_inc_return(&warn_count) >= limit && limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", + origin, limit); +} + +/** + * panic - halt the system + * @fmt: The text string to print + * + * Display a message, then perform cleanups. + * + * This function never returns. + */ +void panic(const char *fmt, ...) +{ + static char buf[1024]; + va_list args; + long i, i_next = 0, len; + int state = 0; + int old_cpu, this_cpu; + bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from panicking the + * system on this thread. Other threads are blocked by the + * panic_mutex in panic(). + */ + panic_on_warn = 0; + } + + /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since + * there is nothing to prevent an interrupt handler (that runs + * after setting panic_cpu) from invoking panic() again. + */ + local_irq_disable(); + preempt_disable_notrace(); + + /* + * It's possible to come here directly from a panic-assertion and + * not have preempt disabled. Some functions called from here want + * preempt to be disabled. No point enabling it later though... + * + * Only one CPU is allowed to execute the panic code from here. For + * multiple parallel invocations of panic, all other CPUs either + * stop themself or will wait until they are stopped by the 1st CPU + * with smp_send_stop(). + * + * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which + * comes here, so go ahead. + * `old_cpu == this_cpu' means we came from nmi_panic() which sets + * panic_cpu to this CPU. In this case, this is also the 1st CPU. + */ + this_cpu = raw_smp_processor_id(); + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); + + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) + panic_smp_self_stop(); + + console_verbose(); + bust_spinlocks(1); + va_start(args, fmt); + len = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + pr_emerg("Kernel panic - not syncing: %s\n", buf); +#ifdef CONFIG_DEBUG_BUGVERBOSE + /* + * Avoid nested stack-dumping if a panic occurs during oops processing + */ + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) + dump_stack(); +#endif + + /* + * If kgdb is enabled, give it a chance to run before we stop all + * the other CPUs or else we won't be able to debug processes left + * running on them. + */ + kgdb_panic(buf); + + /* + * If we have crashed and we have a crash kernel loaded let it handle + * everything else. + * If we want to run this after calling panic_notifiers, pass + * the "crash_kexec_post_notifiers" option to the kernel. + * + * Bypass the panic_cpu check and call __crash_kexec directly. + */ + if (!_crash_kexec_post_notifiers) { + printk_safe_flush_on_panic(); + __crash_kexec(NULL); + + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a + * panic situation. + */ + smp_send_stop(); + } else { + /* + * If we want to do crash dump after notifier calls and + * kmsg_dump, we will need architecture dependent extra + * works in addition to stopping other CPUs. + */ + crash_smp_send_stop(); + } + + /* + * Run any panic handlers, including those that might need to + * add information to the kmsg dump output. + */ + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + + /* Call flush even twice. It tries harder with a single online CPU */ + printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + + /* + * If you doubt kdump always works fine in any situation, + * "crash_kexec_post_notifiers" offers you a chance to run + * panic_notifiers and dumping kmsg before kdump. + * Note: since some panic_notifiers can make crashed kernel + * more unstable, it can increase risks of the kdump failure too. + * + * Bypass the panic_cpu check and call __crash_kexec directly. + */ + if (_crash_kexec_post_notifiers) + __crash_kexec(NULL); + +#ifdef CONFIG_VT + unblank_screen(); +#endif + console_unblank(); + + /* + * We may have ended up stopping the CPU holding the lock (in + * smp_send_stop()) while still having some valuable data in the console + * buffer. Try to acquire the lock then release it regardless of the + * result. The release will also print the buffers out. Locks debug + * should be disabled to avoid reporting bad unlock balance when + * panic() is not being callled from OOPS. + */ + debug_locks_off(); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + + panic_print_sys_info(); + + if (!panic_blink) + panic_blink = no_blink; + + if (panic_timeout > 0) { + /* + * Delay timeout seconds before rebooting the machine. + * We can't use the "normal" timers since we just panicked. + */ + pr_emerg("Rebooting in %d seconds..\n", panic_timeout); + + for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { + touch_nmi_watchdog(); + if (i >= i_next) { + i += panic_blink(state ^= 1); + i_next = i + 3600 / PANIC_BLINK_SPD; + } + mdelay(PANIC_TIMER_STEP); + } + } + if (panic_timeout != 0) { + /* + * This will not be a clean reboot, with everything + * shutting down. But if there is a chance of + * rebooting the system it will be rebooted. + */ + if (panic_reboot_mode != REBOOT_UNDEFINED) + reboot_mode = panic_reboot_mode; + emergency_restart(); + } +#ifdef __sparc__ + { + extern int stop_a_enabled; + /* Make sure the user can actually press Stop-A (L1-A) */ + stop_a_enabled = 1; + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" + "twice on console to return to the boot prom\n"); + } +#endif +#if defined(CONFIG_S390) + disabled_wait(); +#endif + pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); + + /* Do not scroll important messages printed above */ + suppress_printk = 1; + local_irq_enable(); + for (i = 0; ; i += PANIC_TIMER_STEP) { + touch_softlockup_watchdog(); + if (i >= i_next) { + i += panic_blink(state ^= 1); + i_next = i + 3600 / PANIC_BLINK_SPD; + } + mdelay(PANIC_TIMER_STEP); + } +} + +EXPORT_SYMBOL(panic); + +/* + * TAINT_FORCED_RMMOD could be a per-module flag but the module + * is being removed anyway. + */ +const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { + [ TAINT_PROPRIETARY_MODULE ] = { 'P', 'G', true }, + [ TAINT_FORCED_MODULE ] = { 'F', ' ', true }, + [ TAINT_CPU_OUT_OF_SPEC ] = { 'S', ' ', false }, + [ TAINT_FORCED_RMMOD ] = { 'R', ' ', false }, + [ TAINT_MACHINE_CHECK ] = { 'M', ' ', false }, + [ TAINT_BAD_PAGE ] = { 'B', ' ', false }, + [ TAINT_USER ] = { 'U', ' ', false }, + [ TAINT_DIE ] = { 'D', ' ', false }, + [ TAINT_OVERRIDDEN_ACPI_TABLE ] = { 'A', ' ', false }, + [ TAINT_WARN ] = { 'W', ' ', false }, + [ TAINT_CRAP ] = { 'C', ' ', true }, + [ TAINT_FIRMWARE_WORKAROUND ] = { 'I', ' ', false }, + [ TAINT_OOT_MODULE ] = { 'O', ' ', true }, + [ TAINT_UNSIGNED_MODULE ] = { 'E', ' ', true }, + [ TAINT_SOFTLOCKUP ] = { 'L', ' ', false }, + [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, + [ TAINT_AUX ] = { 'X', ' ', true }, + [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, +}; + +/** + * print_tainted - return a string to represent the kernel taint state. + * + * For individual taint flag meanings, see Documentation/admin-guide/sysctl/kernel.rst + * + * The string is overwritten by the next call to print_tainted(), + * but is always NULL terminated. + */ +const char *print_tainted(void) +{ + static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")]; + + BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); + + if (tainted_mask) { + char *s; + int i; + + s = buf + sprintf(buf, "Tainted: "); + for (i = 0; i < TAINT_FLAGS_COUNT; i++) { + const struct taint_flag *t = &taint_flags[i]; + *s++ = test_bit(i, &tainted_mask) ? + t->c_true : t->c_false; + } + *s = 0; + } else + snprintf(buf, sizeof(buf), "Not tainted"); + + return buf; +} + +int test_taint(unsigned flag) +{ + return test_bit(flag, &tainted_mask); +} +EXPORT_SYMBOL(test_taint); + +unsigned long get_taint(void) +{ + return tainted_mask; +} + +/** + * add_taint: add a taint flag if not already set. + * @flag: one of the TAINT_* constants. + * @lockdep_ok: whether lock debugging is still OK. + * + * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for + * some notewortht-but-not-corrupting cases, it can be set to true. + */ +void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) +{ + if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) + pr_warn("Disabling lock debugging due to kernel taint\n"); + + set_bit(flag, &tainted_mask); + + if (tainted_mask & panic_on_taint) { + panic_on_taint = 0; + panic("panic_on_taint set ..."); + } +} +EXPORT_SYMBOL(add_taint); + +static void spin_msec(int msecs) +{ + int i; + + for (i = 0; i < msecs; i++) { + touch_nmi_watchdog(); + mdelay(1); + } +} + +/* + * It just happens that oops_enter() and oops_exit() are identically + * implemented... + */ +static void do_oops_enter_exit(void) +{ + unsigned long flags; + static int spin_counter; + + if (!pause_on_oops) + return; + + spin_lock_irqsave(&pause_on_oops_lock, flags); + if (pause_on_oops_flag == 0) { + /* This CPU may now print the oops message */ + pause_on_oops_flag = 1; + } else { + /* We need to stall this CPU */ + if (!spin_counter) { + /* This CPU gets to do the counting */ + spin_counter = pause_on_oops; + do { + spin_unlock(&pause_on_oops_lock); + spin_msec(MSEC_PER_SEC); + spin_lock(&pause_on_oops_lock); + } while (--spin_counter); + pause_on_oops_flag = 0; + } else { + /* This CPU waits for a different one */ + while (spin_counter) { + spin_unlock(&pause_on_oops_lock); + spin_msec(1); + spin_lock(&pause_on_oops_lock); + } + } + } + spin_unlock_irqrestore(&pause_on_oops_lock, flags); +} + +/* + * Return true if the calling CPU is allowed to print oops-related info. + * This is a bit racy.. + */ +bool oops_may_print(void) +{ + return pause_on_oops_flag == 0; +} + +/* + * Called when the architecture enters its oops handler, before it prints + * anything. If this is the first CPU to oops, and it's oopsing the first + * time then let it proceed. + * + * This is all enabled by the pause_on_oops kernel boot option. We do all + * this to ensure that oopses don't scroll off the screen. It has the + * side-effect of preventing later-oopsing CPUs from mucking up the display, + * too. + * + * It turns out that the CPU which is allowed to print ends up pausing for + * the right duration, whereas all the other CPUs pause for twice as long: + * once in oops_enter(), once in oops_exit(). + */ +void oops_enter(void) +{ + tracing_off(); + /* can't trust the integrity of the kernel anymore: */ + debug_locks_off(); + do_oops_enter_exit(); + + if (sysctl_oops_all_cpu_backtrace) + trigger_all_cpu_backtrace(); +} + +/* + * 64-bit random ID for oopses: + */ +static u64 oops_id; + +static int init_oops_id(void) +{ + if (!oops_id) + get_random_bytes(&oops_id, sizeof(oops_id)); + else + oops_id++; + + return 0; +} +late_initcall(init_oops_id); + +static void print_oops_end_marker(void) +{ + init_oops_id(); + pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); +} + +/* + * Called when the architecture exits its oops handler, after printing + * everything. + */ +void oops_exit(void) +{ + do_oops_enter_exit(); + print_oops_end_marker(); + kmsg_dump(KMSG_DUMP_OOPS); +} + +struct warn_args { + const char *fmt; + va_list args; +}; + +void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args) +{ + disable_trace_on_warning(); + + if (file) + pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", + raw_smp_processor_id(), current->pid, file, line, + caller); + else + pr_warn("WARNING: CPU: %d PID: %d at %pS\n", + raw_smp_processor_id(), current->pid, caller); + + if (args) + vprintk(args->fmt, args->args); + + print_modules(); + + if (regs) + show_regs(regs); + + check_panic_on_warn("kernel"); + + if (!regs) + dump_stack(); + + print_irqtrace_events(current); + + print_oops_end_marker(); + + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); +} + +#ifndef __WARN_FLAGS +void warn_slowpath_fmt(const char *file, int line, unsigned taint, + const char *fmt, ...) +{ + struct warn_args args; + + pr_warn(CUT_HERE); + + if (!fmt) { + __warn(file, line, __builtin_return_address(0), taint, + NULL, NULL); + return; + } + + args.fmt = fmt; + va_start(args.args, fmt); + __warn(file, line, __builtin_return_address(0), taint, NULL, &args); + va_end(args.args); +} +EXPORT_SYMBOL(warn_slowpath_fmt); +#else +void __warn_printk(const char *fmt, ...) +{ + va_list args; + + pr_warn(CUT_HERE); + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); +} +EXPORT_SYMBOL(__warn_printk); +#endif + +#ifdef CONFIG_BUG + +/* Support resetting WARN*_ONCE state */ + +static int clear_warn_once_set(void *data, u64 val) +{ + generic_bug_clear_once(); + memset(__start_once, 0, __end_once - __start_once); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set, + "%lld\n"); + +static __init int register_warn_debugfs(void) +{ + /* Don't care about failure */ + debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL, + &clear_warn_once_fops); + return 0; +} + +device_initcall(register_warn_debugfs); +#endif + +#ifdef CONFIG_STACKPROTECTOR + +/* + * Called when gcc's -fstack-protector feature is used, and + * gcc detects corruption of the on-stack canary value + */ +__visible noinstr void __stack_chk_fail(void) +{ + instrumentation_begin(); + panic("stack-protector: Kernel stack is corrupted in: %pB", + __builtin_return_address(0)); + instrumentation_end(); +} +EXPORT_SYMBOL(__stack_chk_fail); + +#endif + +core_param(panic, panic_timeout, int, 0644); +core_param(panic_print, panic_print, ulong, 0644); +core_param(pause_on_oops, pause_on_oops, int, 0644); +core_param(panic_on_warn, panic_on_warn, int, 0644); +core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init panic_on_taint_setup(char *s) +{ + char *taint_str; + + if (!s) + return -EINVAL; + + taint_str = strsep(&s, ","); + if (kstrtoul(taint_str, 16, &panic_on_taint)) + return -EINVAL; + + /* make sure panic_on_taint doesn't hold out-of-range TAINT flags */ + panic_on_taint &= TAINT_FLAGS_MAX; + + if (!panic_on_taint) + return -EINVAL; + + if (s && !strcmp(s, "nousertaint")) + panic_on_taint_nousertaint = true; + + pr_info("panic_on_taint: bitmask=0x%lx nousertaint_mode=%sabled\n", + panic_on_taint, panic_on_taint_nousertaint ? "en" : "dis"); + + return 0; +} +early_param("panic_on_taint", panic_on_taint_setup); |