From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- fs/pstore/Kconfig | 191 ++++++ fs/pstore/Makefile | 20 + fs/pstore/blk.c | 361 ++++++++++++ fs/pstore/ftrace.c | 208 +++++++ fs/pstore/inode.c | 511 ++++++++++++++++ fs/pstore/internal.h | 52 ++ fs/pstore/platform.c | 764 ++++++++++++++++++++++++ fs/pstore/pmsg.c | 94 +++ fs/pstore/ram.c | 982 +++++++++++++++++++++++++++++++ fs/pstore/ram_core.c | 622 ++++++++++++++++++++ fs/pstore/ram_internal.h | 98 ++++ fs/pstore/zone.c | 1468 ++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 5371 insertions(+) create mode 100644 fs/pstore/Kconfig create mode 100644 fs/pstore/Makefile create mode 100644 fs/pstore/blk.c create mode 100644 fs/pstore/ftrace.c create mode 100644 fs/pstore/inode.c create mode 100644 fs/pstore/internal.h create mode 100644 fs/pstore/platform.c create mode 100644 fs/pstore/pmsg.c create mode 100644 fs/pstore/ram.c create mode 100644 fs/pstore/ram_core.c create mode 100644 fs/pstore/ram_internal.h create mode 100644 fs/pstore/zone.c (limited to 'fs/pstore') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig new file mode 100644 index 0000000000..3acc38600c --- /dev/null +++ b/fs/pstore/Kconfig @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: GPL-2.0-only +config PSTORE + tristate "Persistent store support" + default n + help + This option enables generic access to platform level + persistent storage via "pstore" filesystem that can + be mounted as /dev/pstore. Only useful if you have + a platform level driver that registers with pstore to + provide the data, so you probably should just go say "Y" + (or "M") to a platform specific persistent store driver + (e.g. ACPI_APEI on X86) which will select this for you. + If you don't have a platform persistent store driver, + say N. + +config PSTORE_DEFAULT_KMSG_BYTES + int "Default kernel log storage space" if EXPERT + depends on PSTORE + default "10240" + help + Defines default size of pstore kernel log storage. + Can be enlarged if needed, not recommended to shrink it. + +config PSTORE_COMPRESS + bool "Pstore compression (deflate)" + depends on PSTORE + select ZLIB_INFLATE + select ZLIB_DEFLATE + default y + help + Whether pstore records should be compressed before being written to + the backing store. This is implemented using the zlib 'deflate' + algorithm, using the library implementation instead of using the full + blown crypto API. This reduces the risk of secondary oopses or other + problems while pstore is recording panic metadata. + +config PSTORE_CONSOLE + bool "Log kernel console messages" + depends on PSTORE + help + When the option is enabled, pstore will log all kernel + messages, even if no oops or panic happened. + +config PSTORE_PMSG + bool "Log user space messages" + depends on PSTORE + select RT_MUTEXES + help + When the option is enabled, pstore will export a character + interface /dev/pmsg0 to log user space messages. On reboot + data can be retrieved from /sys/fs/pstore/pmsg-ramoops-[ID]. + + If unsure, say N. + +config PSTORE_FTRACE + bool "Persistent function tracer" + depends on PSTORE + depends on FUNCTION_TRACER + depends on DEBUG_FS + help + With this option kernel traces function calls into a persistent + ram buffer that can be decoded and dumped after reboot through + pstore filesystem. It can be used to determine what function + was last called before a reset or panic. + + If unsure, say N. + +config PSTORE_RAM + tristate "Log panic/oops to a RAM buffer" + depends on PSTORE + depends on HAS_IOMEM + select REED_SOLOMON + select REED_SOLOMON_ENC8 + select REED_SOLOMON_DEC8 + help + This enables panic and oops messages to be logged to a circular + buffer in RAM where it can be read back at some later point. + + Note that for historical reasons, the module will be named + "ramoops.ko". + + For more information, see Documentation/admin-guide/ramoops.rst. + +config PSTORE_ZONE + tristate + depends on PSTORE + help + The common layer for pstore/blk (and pstore/ram in the future) + to manage storage in zones. + +config PSTORE_BLK + tristate "Log panic/oops to a block device" + depends on PSTORE + depends on BLOCK + select PSTORE_ZONE + default n + help + This enables panic and oops message to be logged to a block dev + where it can be read back at some later point. + + For more information, see Documentation/admin-guide/pstore-blk.rst + + If unsure, say N. + +config PSTORE_BLK_BLKDEV + string "block device identifier" + depends on PSTORE_BLK + default "" + help + Which block device should be used for pstore/blk. + + It accepts the following variants: + 1) device number in hexadecimal representation, + with no leading 0x, for example b302. + 2) /dev/ represents the device name of disk + 3) /dev/ represents the device name and number + of partition - device number of disk plus the partition number + 4) /dev/p - same as the above, this form is + used when disk name of partitioned disk ends with a digit. + 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + unique id of a partition if the partition table provides it. + The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + filled hex representation of the 32-bit "NT disk signature", and PP + is a zero-filled hex representation of the 1-based partition number. + 6) PARTUUID=/PARTNROFF= to select a partition in relation + to a partition with a known unique id. + 7) : major and minor number of the device separated by + a colon. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_KMSG_SIZE + int "Size in Kbytes of kmsg dump log to store" + depends on PSTORE_BLK + default 64 + help + This just sets size of kmsg dump (oops, panic, etc) log for + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_MAX_REASON + int "Maximum kmsg dump reason to store" + depends on PSTORE_BLK + default 2 + help + The maximum reason for kmsg dumps to store. The default is + 2 (KMSG_DUMP_OOPS), see include/linux/kmsg_dump.h's + enum kmsg_dump_reason for more details. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_PMSG_SIZE + int "Size in Kbytes of pmsg to store" + depends on PSTORE_BLK + depends on PSTORE_PMSG + default 64 + help + This just sets size of pmsg (pmsg_size) for pstore/blk. The size is + in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_CONSOLE_SIZE + int "Size in Kbytes of console log to store" + depends on PSTORE_BLK + depends on PSTORE_CONSOLE + default 64 + help + This just sets size of console log (console_size) to store via + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_FTRACE_SIZE + int "Size in Kbytes of ftrace log to store" + depends on PSTORE_BLK + depends on PSTORE_FTRACE + default 64 + help + This just sets size of ftrace log (ftrace_size) for pstore/blk. The + size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile new file mode 100644 index 0000000000..c270467aee --- /dev/null +++ b/fs/pstore/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux pstorefs routines. +# + +obj-$(CONFIG_PSTORE) += pstore.o + +pstore-objs += inode.o platform.o +pstore-$(CONFIG_PSTORE_FTRACE) += ftrace.o + +pstore-$(CONFIG_PSTORE_PMSG) += pmsg.o + +ramoops-objs += ram.o ram_core.o +obj-$(CONFIG_PSTORE_RAM) += ramoops.o + +pstore_zone-objs += zone.o +obj-$(CONFIG_PSTORE_ZONE) += pstore_zone.o + +pstore_blk-objs += blk.o +obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c new file mode 100644 index 0000000000..de8cf5d75f --- /dev/null +++ b/fs/pstore/blk.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implements pstore backend driver that write to block (or non-block) storage + * devices, using the pstore/zone API. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static long kmsg_size = CONFIG_PSTORE_BLK_KMSG_SIZE; +module_param(kmsg_size, long, 0400); +MODULE_PARM_DESC(kmsg_size, "kmsg dump record size in kbytes"); + +static int max_reason = CONFIG_PSTORE_BLK_MAX_REASON; +module_param(max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic)"); + +#if IS_ENABLED(CONFIG_PSTORE_PMSG) +static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE; +#else +static long pmsg_size = -1; +#endif +module_param(pmsg_size, long, 0400); +MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes"); + +#if IS_ENABLED(CONFIG_PSTORE_CONSOLE) +static long console_size = CONFIG_PSTORE_BLK_CONSOLE_SIZE; +#else +static long console_size = -1; +#endif +module_param(console_size, long, 0400); +MODULE_PARM_DESC(console_size, "console size in kbytes"); + +#if IS_ENABLED(CONFIG_PSTORE_FTRACE) +static long ftrace_size = CONFIG_PSTORE_BLK_FTRACE_SIZE; +#else +static long ftrace_size = -1; +#endif +module_param(ftrace_size, long, 0400); +MODULE_PARM_DESC(ftrace_size, "ftrace size in kbytes"); + +static bool best_effort; +module_param(best_effort, bool, 0400); +MODULE_PARM_DESC(best_effort, "use best effort to write (i.e. do not require storage driver pstore support, default: off)"); + +/* + * blkdev - the block device to use for pstore storage + * See Documentation/admin-guide/pstore-blk.rst for details. + */ +static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV; +module_param_string(blkdev, blkdev, 80, 0400); +MODULE_PARM_DESC(blkdev, "block device for pstore storage"); + +/* + * All globals must only be accessed under the pstore_blk_lock + * during the register/unregister functions. + */ +static DEFINE_MUTEX(pstore_blk_lock); +static struct file *psblk_file; +static struct pstore_device_info *pstore_device_info; + +#define check_size(name, alignsize) ({ \ + long _##name_ = (name); \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & ((alignsize) - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, (alignsize)); \ + } \ + _##name_; \ +}) + +#define verify_size(name, alignsize, enabled) { \ + long _##name_; \ + if (enabled) \ + _##name_ = check_size(name, alignsize); \ + else \ + _##name_ = 0; \ + /* Synchronize module parameters with resuls. */ \ + name = _##name_ / 1024; \ + dev->zone.name = _##name_; \ +} + +static int __register_pstore_device(struct pstore_device_info *dev) +{ + int ret; + + lockdep_assert_held(&pstore_blk_lock); + + if (!dev) { + pr_err("NULL device info\n"); + return -EINVAL; + } + if (!dev->zone.total_size) { + pr_err("zero sized device\n"); + return -EINVAL; + } + if (!dev->zone.read) { + pr_err("no read handler for device\n"); + return -EINVAL; + } + if (!dev->zone.write) { + pr_err("no write handler for device\n"); + return -EINVAL; + } + + /* someone already registered before */ + if (pstore_device_info) + return -EBUSY; + + /* zero means not limit on which backends to attempt to store. */ + if (!dev->flags) + dev->flags = UINT_MAX; + + /* Copy in module parameters. */ + verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); + verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); + verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE); + verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE); + dev->zone.max_reason = max_reason; + + /* Initialize required zone ownership details. */ + dev->zone.name = KBUILD_MODNAME; + dev->zone.owner = THIS_MODULE; + + ret = register_pstore_zone(&dev->zone); + if (ret == 0) + pstore_device_info = dev; + + return ret; +} +/** + * register_pstore_device() - register non-block device to pstore/blk + * + * @dev: non-block device information + * + * Return: + * * 0 - OK + * * Others - something error. + */ +int register_pstore_device(struct pstore_device_info *dev) +{ + int ret; + + mutex_lock(&pstore_blk_lock); + ret = __register_pstore_device(dev); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_pstore_device); + +static void __unregister_pstore_device(struct pstore_device_info *dev) +{ + lockdep_assert_held(&pstore_blk_lock); + if (pstore_device_info && pstore_device_info == dev) { + unregister_pstore_zone(&dev->zone); + pstore_device_info = NULL; + } +} + +/** + * unregister_pstore_device() - unregister non-block device from pstore/blk + * + * @dev: non-block device information + */ +void unregister_pstore_device(struct pstore_device_info *dev) +{ + mutex_lock(&pstore_blk_lock); + __unregister_pstore_device(dev); + mutex_unlock(&pstore_blk_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_device); + +static ssize_t psblk_generic_blk_read(char *buf, size_t bytes, loff_t pos) +{ + return kernel_read(psblk_file, buf, bytes, &pos); +} + +static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes, + loff_t pos) +{ + /* Console/Ftrace backend may handle buffer until flush dirty zones */ + if (in_interrupt() || irqs_disabled()) + return -EBUSY; + return kernel_write(psblk_file, buf, bytes, &pos); +} + +/* + * This takes its configuration only from the module parameters now. + */ +static int __register_pstore_blk(struct pstore_device_info *dev, + const char *devpath) +{ + int ret = -ENODEV; + + lockdep_assert_held(&pstore_blk_lock); + + psblk_file = filp_open(devpath, O_RDWR | O_DSYNC | O_NOATIME | O_EXCL, 0); + if (IS_ERR(psblk_file)) { + ret = PTR_ERR(psblk_file); + pr_err("failed to open '%s': %d!\n", devpath, ret); + goto err; + } + + if (!S_ISBLK(file_inode(psblk_file)->i_mode)) { + pr_err("'%s' is not block device!\n", devpath); + goto err_fput; + } + + dev->zone.total_size = + bdev_nr_bytes(I_BDEV(psblk_file->f_mapping->host)); + + ret = __register_pstore_device(dev); + if (ret) + goto err_fput; + + return 0; + +err_fput: + fput(psblk_file); +err: + psblk_file = NULL; + + return ret; +} + +/* get information of pstore/blk */ +int pstore_blk_get_config(struct pstore_blk_config *info) +{ + strncpy(info->device, blkdev, 80); + info->max_reason = max_reason; + info->kmsg_size = check_size(kmsg_size, 4096); + info->pmsg_size = check_size(pmsg_size, 4096); + info->ftrace_size = check_size(ftrace_size, 4096); + info->console_size = check_size(console_size, 4096); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_blk_get_config); + + +#ifndef MODULE +static const char devname[] = "/dev/pstore-blk"; +static __init const char *early_boot_devpath(const char *initial_devname) +{ + /* + * During early boot the real root file system hasn't been + * mounted yet, and no device nodes are present yet. Use the + * same scheme to find the device that we use for mounting + * the root file system. + */ + dev_t dev; + + if (early_lookup_bdev(initial_devname, &dev)) { + pr_err("failed to resolve '%s'!\n", initial_devname); + return initial_devname; + } + + init_unlink(devname); + init_mknod(devname, S_IFBLK | 0600, new_encode_dev(dev)); + + return devname; +} +#else +static inline const char *early_boot_devpath(const char *initial_devname) +{ + return initial_devname; +} +#endif + +static int __init __best_effort_init(void) +{ + struct pstore_device_info *best_effort_dev; + int ret; + + /* No best-effort mode requested. */ + if (!best_effort) + return 0; + + /* Reject an empty blkdev. */ + if (!blkdev[0]) { + pr_err("blkdev empty with best_effort=Y\n"); + return -EINVAL; + } + + best_effort_dev = kzalloc(sizeof(*best_effort_dev), GFP_KERNEL); + if (!best_effort_dev) + return -ENOMEM; + + best_effort_dev->zone.read = psblk_generic_blk_read; + best_effort_dev->zone.write = psblk_generic_blk_write; + + ret = __register_pstore_blk(best_effort_dev, + early_boot_devpath(blkdev)); + if (ret) + kfree(best_effort_dev); + else + pr_info("attached %s (%lu) (no dedicated panic_write!)\n", + blkdev, best_effort_dev->zone.total_size); + + return ret; +} + +static void __exit __best_effort_exit(void) +{ + /* + * Currently, the only user of psblk_file is best_effort, so + * we can assume that pstore_device_info is associated with it. + * Once there are "real" blk devices, there will need to be a + * dedicated pstore_blk_info, etc. + */ + if (psblk_file) { + struct pstore_device_info *dev = pstore_device_info; + + __unregister_pstore_device(dev); + kfree(dev); + fput(psblk_file); + psblk_file = NULL; + } +} + +static int __init pstore_blk_init(void) +{ + int ret; + + mutex_lock(&pstore_blk_lock); + ret = __best_effort_init(); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +late_initcall(pstore_blk_init); + +static void __exit pstore_blk_exit(void) +{ + mutex_lock(&pstore_blk_lock); + __best_effort_exit(); + /* If we've been asked to unload, unregister any remaining device. */ + __unregister_pstore_device(pstore_device_info); + mutex_unlock(&pstore_blk_lock); +} +module_exit(pstore_blk_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("pstore backend for block devices"); diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c new file mode 100644 index 0000000000..776cae20af --- /dev/null +++ b/fs/pstore/ftrace.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2012 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/* This doesn't need to be atomic: speed is chosen over correctness here. */ +static u64 pstore_ftrace_stamp; + +static void notrace pstore_ftrace_call(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + int bit; + unsigned long flags; + struct pstore_ftrace_record rec = {}; + struct pstore_record record = { + .type = PSTORE_TYPE_FTRACE, + .buf = (char *)&rec, + .size = sizeof(rec), + .psi = psinfo, + }; + + if (unlikely(oops_in_progress)) + return; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + local_irq_save(flags); + + rec.ip = ip; + rec.parent_ip = parent_ip; + pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++); + pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); + psinfo->write(&record); + + local_irq_restore(flags); + ftrace_test_recursion_unlock(bit); +} + +static struct ftrace_ops pstore_ftrace_ops __read_mostly = { + .func = pstore_ftrace_call, +}; + +static DEFINE_MUTEX(pstore_ftrace_lock); +static bool pstore_ftrace_enabled; + +static int pstore_set_ftrace_enabled(bool on) +{ + ssize_t ret; + + if (on == pstore_ftrace_enabled) + return 0; + + if (on) { + ftrace_ops_set_global_filter(&pstore_ftrace_ops); + ret = register_ftrace_function(&pstore_ftrace_ops); + } else { + ret = unregister_ftrace_function(&pstore_ftrace_ops); + } + + if (ret) { + pr_err("%s: unable to %sregister ftrace ops: %zd\n", + __func__, on ? "" : "un", ret); + } else { + pstore_ftrace_enabled = on; + } + + return ret; +} + +static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + u8 on; + ssize_t ret; + + ret = kstrtou8_from_user(buf, count, 2, &on); + if (ret) + return ret; + + mutex_lock(&pstore_ftrace_lock); + ret = pstore_set_ftrace_enabled(on); + mutex_unlock(&pstore_ftrace_lock); + + if (ret == 0) + ret = count; + + return ret; +} + +static ssize_t pstore_ftrace_knob_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + char val[] = { '0' + pstore_ftrace_enabled, '\n' }; + + return simple_read_from_buffer(buf, count, ppos, val, sizeof(val)); +} + +static const struct file_operations pstore_knob_fops = { + .open = simple_open, + .read = pstore_ftrace_knob_read, + .write = pstore_ftrace_knob_write, +}; + +static struct dentry *pstore_ftrace_dir; + +static bool record_ftrace; +module_param(record_ftrace, bool, 0400); +MODULE_PARM_DESC(record_ftrace, + "enable ftrace recording immediately (default: off)"); + +void pstore_register_ftrace(void) +{ + if (!psinfo->write) + return; + + pstore_ftrace_dir = debugfs_create_dir("pstore", NULL); + + pstore_set_ftrace_enabled(record_ftrace); + + debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, NULL, + &pstore_knob_fops); +} + +void pstore_unregister_ftrace(void) +{ + mutex_lock(&pstore_ftrace_lock); + if (pstore_ftrace_enabled) { + unregister_ftrace_function(&pstore_ftrace_ops); + pstore_ftrace_enabled = false; + } + mutex_unlock(&pstore_ftrace_lock); + + debugfs_remove_recursive(pstore_ftrace_dir); +} + +ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size) +{ + size_t dest_size, src_size, total, dest_off, src_off; + size_t dest_idx = 0, src_idx = 0, merged_idx = 0; + void *merged_buf; + struct pstore_ftrace_record *drec, *srec, *mrec; + size_t record_size = sizeof(struct pstore_ftrace_record); + + dest_off = *dest_log_size % record_size; + dest_size = *dest_log_size - dest_off; + + src_off = src_log_size % record_size; + src_size = src_log_size - src_off; + + total = dest_size + src_size; + merged_buf = kmalloc(total, GFP_KERNEL); + if (!merged_buf) + return -ENOMEM; + + drec = (struct pstore_ftrace_record *)(*dest_log + dest_off); + srec = (struct pstore_ftrace_record *)(src_log + src_off); + mrec = (struct pstore_ftrace_record *)(merged_buf); + + while (dest_size > 0 && src_size > 0) { + if (pstore_ftrace_read_timestamp(&drec[dest_idx]) < + pstore_ftrace_read_timestamp(&srec[src_idx])) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } else { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + } + + while (dest_size > 0) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } + + while (src_size > 0) { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + + kfree(*dest_log); + *dest_log = merged_buf; + *dest_log_size = total; + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_ftrace_combine_log); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c new file mode 100644 index 0000000000..585360706b --- /dev/null +++ b/fs/pstore/inode.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Persistent Storage - ramfs parts. + * + * Copyright (C) 2010 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define PSTORE_NAMELEN 64 + +static DEFINE_MUTEX(records_list_lock); +static LIST_HEAD(records_list); + +static DEFINE_MUTEX(pstore_sb_lock); +static struct super_block *pstore_sb; + +struct pstore_private { + struct list_head list; + struct dentry *dentry; + struct pstore_record *record; + size_t total_size; +}; + +struct pstore_ftrace_seq_data { + const void *ptr; + size_t off; + size_t size; +}; + +#define REC_SIZE sizeof(struct pstore_ftrace_record) + +static void free_pstore_private(struct pstore_private *private) +{ + if (!private) + return; + if (private->record) { + kvfree(private->record->buf); + kfree(private->record->priv); + kfree(private->record); + } + kfree(private); +} + +static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->off = ps->total_size % REC_SIZE; + data->off += *pos * REC_SIZE; + if (data->off + REC_SIZE > ps->total_size) { + kfree(data); + return NULL; + } + + return data; + +} + +static void pstore_ftrace_seq_stop(struct seq_file *s, void *v) +{ + kfree(v); +} + +static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data = v; + + (*pos)++; + data->off += REC_SIZE; + if (data->off + REC_SIZE > ps->total_size) + return NULL; + + return data; +} + +static int pstore_ftrace_seq_show(struct seq_file *s, void *v) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data = v; + struct pstore_ftrace_record *rec; + + if (!data) + return 0; + + rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off); + + seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n", + pstore_ftrace_decode_cpu(rec), + pstore_ftrace_read_timestamp(rec), + rec->ip, rec->parent_ip, (void *)rec->ip, + (void *)rec->parent_ip); + + return 0; +} + +static const struct seq_operations pstore_ftrace_seq_ops = { + .start = pstore_ftrace_seq_start, + .next = pstore_ftrace_seq_next, + .stop = pstore_ftrace_seq_stop, + .show = pstore_ftrace_seq_show, +}; + +static ssize_t pstore_file_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *sf = file->private_data; + struct pstore_private *ps = sf->private; + + if (ps->record->type == PSTORE_TYPE_FTRACE) + return seq_read(file, userbuf, count, ppos); + return simple_read_from_buffer(userbuf, count, ppos, + ps->record->buf, ps->total_size); +} + +static int pstore_file_open(struct inode *inode, struct file *file) +{ + struct pstore_private *ps = inode->i_private; + struct seq_file *sf; + int err; + const struct seq_operations *sops = NULL; + + if (ps->record->type == PSTORE_TYPE_FTRACE) + sops = &pstore_ftrace_seq_ops; + + err = seq_open(file, sops); + if (err < 0) + return err; + + sf = file->private_data; + sf->private = ps; + + return 0; +} + +static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence) +{ + struct seq_file *sf = file->private_data; + + if (sf->op) + return seq_lseek(file, off, whence); + return default_llseek(file, off, whence); +} + +static const struct file_operations pstore_file_operations = { + .open = pstore_file_open, + .read = pstore_file_read, + .llseek = pstore_file_llseek, + .release = seq_release, +}; + +/* + * When a file is unlinked from our file system we call the + * platform driver to erase the record from persistent store. + */ +static int pstore_unlink(struct inode *dir, struct dentry *dentry) +{ + struct pstore_private *p = d_inode(dentry)->i_private; + struct pstore_record *record = p->record; + int rc = 0; + + if (!record->psi->erase) + return -EPERM; + + /* Make sure we can't race while removing this file. */ + mutex_lock(&records_list_lock); + if (!list_empty(&p->list)) + list_del_init(&p->list); + else + rc = -ENOENT; + p->dentry = NULL; + mutex_unlock(&records_list_lock); + if (rc) + return rc; + + mutex_lock(&record->psi->read_mutex); + record->psi->erase(record); + mutex_unlock(&record->psi->read_mutex); + + return simple_unlink(dir, dentry); +} + +static void pstore_evict_inode(struct inode *inode) +{ + struct pstore_private *p = inode->i_private; + + clear_inode(inode); + free_pstore_private(p); +} + +static const struct inode_operations pstore_dir_inode_operations = { + .lookup = simple_lookup, + .unlink = pstore_unlink, +}; + +static struct inode *pstore_get_inode(struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + } + return inode; +} + +enum { + Opt_kmsg_bytes, Opt_err +}; + +static const match_table_t tokens = { + {Opt_kmsg_bytes, "kmsg_bytes=%u"}, + {Opt_err, NULL} +}; + +static void parse_options(char *options) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + if (!options) + return; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_kmsg_bytes: + if (!match_int(&args[0], &option)) + pstore_set_kmsg_bytes(option); + break; + } + } +} + +/* + * Display the mount options in /proc/mounts. + */ +static int pstore_show_options(struct seq_file *m, struct dentry *root) +{ + if (kmsg_bytes != CONFIG_PSTORE_DEFAULT_KMSG_BYTES) + seq_printf(m, ",kmsg_bytes=%lu", kmsg_bytes); + return 0; +} + +static int pstore_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + parse_options(data); + + return 0; +} + +static const struct super_operations pstore_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .evict_inode = pstore_evict_inode, + .remount_fs = pstore_remount, + .show_options = pstore_show_options, +}; + +static struct dentry *psinfo_lock_root(void) +{ + struct dentry *root; + + mutex_lock(&pstore_sb_lock); + /* + * Having no backend is fine -- no records appear. + * Not being mounted is fine -- nothing to do. + */ + if (!psinfo || !pstore_sb) { + mutex_unlock(&pstore_sb_lock); + return NULL; + } + + root = pstore_sb->s_root; + inode_lock(d_inode(root)); + mutex_unlock(&pstore_sb_lock); + + return root; +} + +int pstore_put_backend_records(struct pstore_info *psi) +{ + struct pstore_private *pos, *tmp; + struct dentry *root; + int rc = 0; + + root = psinfo_lock_root(); + if (!root) + return 0; + + mutex_lock(&records_list_lock); + list_for_each_entry_safe(pos, tmp, &records_list, list) { + if (pos->record->psi == psi) { + list_del_init(&pos->list); + rc = simple_unlink(d_inode(root), pos->dentry); + if (WARN_ON(rc)) + break; + d_drop(pos->dentry); + dput(pos->dentry); + pos->dentry = NULL; + } + } + mutex_unlock(&records_list_lock); + + inode_unlock(d_inode(root)); + + return rc; +} + +/* + * Make a regular file in the root directory of our file system. + * Load it up with "size" bytes of data from "buf". + * Set the mtime & ctime to the date that this record was originally stored. + */ +int pstore_mkfile(struct dentry *root, struct pstore_record *record) +{ + struct dentry *dentry; + struct inode *inode; + int rc = 0; + char name[PSTORE_NAMELEN]; + struct pstore_private *private, *pos; + size_t size = record->size + record->ecc_notice_size; + + if (WARN_ON(!inode_is_locked(d_inode(root)))) + return -EINVAL; + + rc = -EEXIST; + /* Skip records that are already present in the filesystem. */ + mutex_lock(&records_list_lock); + list_for_each_entry(pos, &records_list, list) { + if (pos->record->type == record->type && + pos->record->id == record->id && + pos->record->psi == record->psi) + goto fail; + } + + rc = -ENOMEM; + inode = pstore_get_inode(root->d_sb); + if (!inode) + goto fail; + inode->i_mode = S_IFREG | 0444; + inode->i_fop = &pstore_file_operations; + scnprintf(name, sizeof(name), "%s-%s-%llu%s", + pstore_type_to_name(record->type), + record->psi->name, record->id, + record->compressed ? ".enc.z" : ""); + + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) + goto fail_inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + goto fail_private; + + private->dentry = dentry; + private->record = record; + inode->i_size = private->total_size = size; + inode->i_private = private; + + if (record->time.tv_sec) + inode->i_mtime = inode_set_ctime_to_ts(inode, record->time); + + d_add(dentry, inode); + + list_add(&private->list, &records_list); + mutex_unlock(&records_list_lock); + + return 0; + +fail_private: + free_pstore_private(private); +fail_inode: + iput(inode); +fail: + mutex_unlock(&records_list_lock); + return rc; +} + +/* + * Read all the records from the persistent store. Create + * files in our filesystem. Don't warn about -EEXIST errors + * when we are re-scanning the backing store looking to add new + * error records. + */ +void pstore_get_records(int quiet) +{ + struct dentry *root; + + root = psinfo_lock_root(); + if (!root) + return; + + pstore_get_backend_records(psinfo, root, quiet); + inode_unlock(d_inode(root)); +} + +static int pstore_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = PSTOREFS_MAGIC; + sb->s_op = &pstore_ops; + sb->s_time_gran = 1; + + parse_options(data); + + inode = pstore_get_inode(sb); + if (inode) { + inode->i_mode = S_IFDIR | 0750; + inode->i_op = &pstore_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(inode); + } + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + mutex_lock(&pstore_sb_lock); + pstore_sb = sb; + mutex_unlock(&pstore_sb_lock); + + pstore_get_records(0); + + return 0; +} + +static struct dentry *pstore_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_single(fs_type, flags, data, pstore_fill_super); +} + +static void pstore_kill_sb(struct super_block *sb) +{ + mutex_lock(&pstore_sb_lock); + WARN_ON(pstore_sb && pstore_sb != sb); + + kill_litter_super(sb); + pstore_sb = NULL; + + mutex_lock(&records_list_lock); + INIT_LIST_HEAD(&records_list); + mutex_unlock(&records_list_lock); + + mutex_unlock(&pstore_sb_lock); +} + +static struct file_system_type pstore_fs_type = { + .owner = THIS_MODULE, + .name = "pstore", + .mount = pstore_mount, + .kill_sb = pstore_kill_sb, +}; + +int __init pstore_init_fs(void) +{ + int err; + + /* Create a convenient mount point for people to access pstore */ + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) + goto out; + + err = register_filesystem(&pstore_fs_type); + if (err < 0) + sysfs_remove_mount_point(fs_kobj, "pstore"); + +out: + return err; +} + +void __exit pstore_exit_fs(void) +{ + unregister_filesystem(&pstore_fs_type); + sysfs_remove_mount_point(fs_kobj, "pstore"); +} diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h new file mode 100644 index 0000000000..801d6c0b17 --- /dev/null +++ b/fs/pstore/internal.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PSTORE_INTERNAL_H__ +#define __PSTORE_INTERNAL_H__ + +#include +#include +#include + +extern unsigned long kmsg_bytes; + +#ifdef CONFIG_PSTORE_FTRACE +extern void pstore_register_ftrace(void); +extern void pstore_unregister_ftrace(void); +ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size); +#else +static inline void pstore_register_ftrace(void) {} +static inline void pstore_unregister_ftrace(void) {} +static inline ssize_t +pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size) +{ + *dest_log_size = 0; + return 0; +} +#endif + +#ifdef CONFIG_PSTORE_PMSG +extern void pstore_register_pmsg(void); +extern void pstore_unregister_pmsg(void); +#else +static inline void pstore_register_pmsg(void) {} +static inline void pstore_unregister_pmsg(void) {} +#endif + +extern struct pstore_info *psinfo; + +extern void pstore_set_kmsg_bytes(int); +extern void pstore_get_records(int); +extern void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet); +extern int pstore_put_backend_records(struct pstore_info *psi); +extern int pstore_mkfile(struct dentry *root, + struct pstore_record *record); +extern void pstore_record_init(struct pstore_record *record, + struct pstore_info *psi); + +/* Called during pstore init/exit. */ +int __init pstore_init_fs(void); +void __exit pstore_exit_fs(void); + +#endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c new file mode 100644 index 0000000000..03425928d2 --- /dev/null +++ b/fs/pstore/platform.c @@ -0,0 +1,764 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Persistent Storage - platform driver interface parts. + * + * Copyright (C) 2007-2008 Google, Inc. + * Copyright (C) 2010 Intel Corporation + */ + +#define pr_fmt(fmt) "pstore: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * We defer making "oops" entries appear in pstore - see + * whether the system is actually still running well enough + * to let someone see the entry + */ +static int pstore_update_ms = -1; +module_param_named(update_ms, pstore_update_ms, int, 0600); +MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content " + "(default is -1, which means runtime updates are disabled; " + "enabling this option may not be safe; it may lead to further " + "corruption on Oopses)"); + +/* Names should be in the same order as the enum pstore_type_id */ +static const char * const pstore_type_names[] = { + "dmesg", + "mce", + "console", + "ftrace", + "rtas", + "powerpc-ofw", + "powerpc-common", + "pmsg", + "powerpc-opal", +}; + +static int pstore_new_entry; + +static void pstore_timefunc(struct timer_list *); +static DEFINE_TIMER(pstore_timer, pstore_timefunc); + +static void pstore_dowork(struct work_struct *); +static DECLARE_WORK(pstore_work, pstore_dowork); + +/* + * psinfo_lock protects "psinfo" during calls to + * pstore_register(), pstore_unregister(), and + * the filesystem mount/unmount routines. + */ +static DEFINE_MUTEX(psinfo_lock); +struct pstore_info *psinfo; + +static char *backend; +module_param(backend, charp, 0444); +MODULE_PARM_DESC(backend, "specific backend to use"); + +/* + * pstore no longer implements compression via the crypto API, and only + * supports zlib deflate compression implemented using the zlib library + * interface. This removes additional complexity which is hard to justify for a + * diagnostic facility that has to operate in conditions where the system may + * have become unstable. Zlib deflate is comparatively small in terms of code + * size, and compresses ASCII text comparatively well. In terms of compression + * speed, deflate is not the best performer but for recording the log output on + * a kernel panic, this is not considered critical. + * + * The only remaining arguments supported by the compress= module parameter are + * 'deflate' and 'none'. To retain compatibility with existing installations, + * all other values are logged and replaced with 'deflate'. + */ +static char *compress = "deflate"; +module_param(compress, charp, 0444); +MODULE_PARM_DESC(compress, "compression to use"); + +/* How much of the kernel log to snapshot */ +unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES; +module_param(kmsg_bytes, ulong, 0444); +MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)"); + +static void *compress_workspace; + +/* + * Compression is only used for dmesg output, which consists of low-entropy + * ASCII text, and so we can assume worst-case 60%. + */ +#define DMESG_COMP_PERCENT 60 + +static char *big_oops_buf; +static size_t max_compressed_size; + +void pstore_set_kmsg_bytes(int bytes) +{ + kmsg_bytes = bytes; +} + +/* Tag each group of saved records with a sequence number */ +static int oopscount; + +const char *pstore_type_to_name(enum pstore_type_id type) +{ + BUILD_BUG_ON(ARRAY_SIZE(pstore_type_names) != PSTORE_TYPE_MAX); + + if (WARN_ON_ONCE(type >= PSTORE_TYPE_MAX)) + return "unknown"; + + return pstore_type_names[type]; +} +EXPORT_SYMBOL_GPL(pstore_type_to_name); + +enum pstore_type_id pstore_name_to_type(const char *name) +{ + int i; + + for (i = 0; i < PSTORE_TYPE_MAX; i++) { + if (!strcmp(pstore_type_names[i], name)) + return i; + } + + return PSTORE_TYPE_MAX; +} +EXPORT_SYMBOL_GPL(pstore_name_to_type); + +static void pstore_timer_kick(void) +{ + if (pstore_update_ms < 0) + return; + + mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); +} + +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) +{ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ + if (in_nmi()) + return true; + + switch (reason) { + /* In panic case, other cpus are stopped by smp_send_stop(). */ + case KMSG_DUMP_PANIC: + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ + case KMSG_DUMP_EMERG: + return true; + default: + return false; + } +} + +static int pstore_compress(const void *in, void *out, + unsigned int inlen, unsigned int outlen) +{ + struct z_stream_s zstream = { + .next_in = in, + .avail_in = inlen, + .next_out = out, + .avail_out = outlen, + .workspace = compress_workspace, + }; + int ret; + + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS)) + return -EINVAL; + + ret = zlib_deflateInit2(&zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) + return -EINVAL; + + ret = zlib_deflate(&zstream, Z_FINISH); + if (ret != Z_STREAM_END) + return -EINVAL; + + ret = zlib_deflateEnd(&zstream); + if (ret != Z_OK) + pr_warn_once("zlib_deflateEnd() failed: %d\n", ret); + + return zstream.total_out; +} + +static void allocate_buf_for_compression(void) +{ + size_t compressed_size; + char *buf; + + /* Skip if not built-in or compression disabled. */ + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress || + !strcmp(compress, "none")) { + compress = NULL; + return; + } + + if (strcmp(compress, "deflate")) { + pr_err("Unsupported compression '%s', falling back to deflate\n", + compress); + compress = "deflate"; + } + + /* + * The compression buffer only needs to be as large as the maximum + * uncompressed record size, since any record that would be expanded by + * compression is just stored uncompressed. + */ + compressed_size = (psinfo->bufsize * 100) / DMESG_COMP_PERCENT; + buf = kvzalloc(compressed_size, GFP_KERNEL); + if (!buf) { + pr_err("Failed %zu byte compression buffer allocation for: %s\n", + psinfo->bufsize, compress); + return; + } + + compress_workspace = + vmalloc(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL)); + if (!compress_workspace) { + pr_err("Failed to allocate zlib deflate workspace\n"); + kvfree(buf); + return; + } + + /* A non-NULL big_oops_buf indicates compression is available. */ + big_oops_buf = buf; + max_compressed_size = compressed_size; + + pr_info("Using crash dump compression: %s\n", compress); +} + +static void free_buf_for_compression(void) +{ + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress_workspace) { + vfree(compress_workspace); + compress_workspace = NULL; + } + + kvfree(big_oops_buf); + big_oops_buf = NULL; + max_compressed_size = 0; +} + +void pstore_record_init(struct pstore_record *record, + struct pstore_info *psinfo) +{ + memset(record, 0, sizeof(*record)); + + record->psi = psinfo; + + /* Report zeroed timestamp if called before timekeeping has resumed. */ + record->time = ns_to_timespec64(ktime_get_real_fast_ns()); +} + +/* + * callback from kmsg_dump. Save as much as we can (up to kmsg_bytes) from the + * end of the buffer. + */ +static void pstore_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + struct kmsg_dump_iter iter; + unsigned long total = 0; + const char *why; + unsigned int part = 1; + unsigned long flags = 0; + int saved_ret = 0; + int ret; + + why = kmsg_dump_reason_str(reason); + + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); + return; + } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); + } + + kmsg_dump_rewind(&iter); + + oopscount++; + while (total < kmsg_bytes) { + char *dst; + size_t dst_size; + int header_size; + int zipped_len = -1; + size_t dump_size; + struct pstore_record record; + + pstore_record_init(&record, psinfo); + record.type = PSTORE_TYPE_DMESG; + record.count = oopscount; + record.reason = reason; + record.part = part; + record.buf = psinfo->buf; + + dst = big_oops_buf ?: psinfo->buf; + dst_size = max_compressed_size ?: psinfo->bufsize; + + /* Write dump header. */ + header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why, + oopscount, part); + dst_size -= header_size; + + /* Write dump contents. */ + if (!kmsg_dump_get_buffer(&iter, true, dst + header_size, + dst_size, &dump_size)) + break; + + if (big_oops_buf) { + zipped_len = pstore_compress(dst, psinfo->buf, + header_size + dump_size, + psinfo->bufsize); + + if (zipped_len > 0) { + record.compressed = true; + record.size = zipped_len; + } else { + /* + * Compression failed, so the buffer is most + * likely filled with binary data that does not + * compress as well as ASCII text. Copy as much + * of the uncompressed data as possible into + * the pstore record, and discard the rest. + */ + record.size = psinfo->bufsize; + memcpy(psinfo->buf, dst, psinfo->bufsize); + } + } else { + record.size = header_size + dump_size; + } + + ret = psinfo->write(&record); + if (ret == 0 && reason == KMSG_DUMP_OOPS) { + pstore_new_entry = 1; + pstore_timer_kick(); + } else { + /* Preserve only the first non-zero returned value. */ + if (!saved_ret) + saved_ret = ret; + } + + total += record.size; + part++; + } + spin_unlock_irqrestore(&psinfo->buf_lock, flags); + + if (saved_ret) { + pr_err_once("backend (%s) writing error (%d)\n", psinfo->name, + saved_ret); + } +} + +static struct kmsg_dumper pstore_dumper = { + .dump = pstore_dump, +}; + +/* + * Register with kmsg_dump to save last part of console log on panic. + */ +static void pstore_register_kmsg(void) +{ + kmsg_dump_register(&pstore_dumper); +} + +static void pstore_unregister_kmsg(void) +{ + kmsg_dump_unregister(&pstore_dumper); +} + +#ifdef CONFIG_PSTORE_CONSOLE +static void pstore_console_write(struct console *con, const char *s, unsigned c) +{ + struct pstore_record record; + + if (!c) + return; + + pstore_record_init(&record, psinfo); + record.type = PSTORE_TYPE_CONSOLE; + + record.buf = (char *)s; + record.size = c; + psinfo->write(&record); +} + +static struct console pstore_console = { + .write = pstore_console_write, + .index = -1, +}; + +static void pstore_register_console(void) +{ + /* Show which backend is going to get console writes. */ + strscpy(pstore_console.name, psinfo->name, + sizeof(pstore_console.name)); + /* + * Always initialize flags here since prior unregister_console() + * calls may have changed settings (specifically CON_ENABLED). + */ + pstore_console.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME; + register_console(&pstore_console); +} + +static void pstore_unregister_console(void) +{ + unregister_console(&pstore_console); +} +#else +static void pstore_register_console(void) {} +static void pstore_unregister_console(void) {} +#endif + +static int pstore_write_user_compat(struct pstore_record *record, + const char __user *buf) +{ + int ret = 0; + + if (record->buf) + return -EINVAL; + + record->buf = vmemdup_user(buf, record->size); + if (IS_ERR(record->buf)) { + ret = PTR_ERR(record->buf); + goto out; + } + + ret = record->psi->write(record); + + kvfree(record->buf); +out: + record->buf = NULL; + + return unlikely(ret < 0) ? ret : record->size; +} + +/* + * platform specific persistent storage driver registers with + * us here. If pstore is already mounted, call the platform + * read function right away to populate the file system. If not + * then the pstore mount code will call us later to fill out + * the file system. + */ +int pstore_register(struct pstore_info *psi) +{ + char *new_backend; + + if (backend && strcmp(backend, psi->name)) { + pr_warn("backend '%s' already in use: ignoring '%s'\n", + backend, psi->name); + return -EBUSY; + } + + /* Sanity check flags. */ + if (!psi->flags) { + pr_warn("backend '%s' must support at least one frontend\n", + psi->name); + return -EINVAL; + } + + /* Check for required functions. */ + if (!psi->read || !psi->write) { + pr_warn("backend '%s' must implement read() and write()\n", + psi->name); + return -EINVAL; + } + + new_backend = kstrdup(psi->name, GFP_KERNEL); + if (!new_backend) + return -ENOMEM; + + mutex_lock(&psinfo_lock); + if (psinfo) { + pr_warn("backend '%s' already loaded: ignoring '%s'\n", + psinfo->name, psi->name); + mutex_unlock(&psinfo_lock); + kfree(new_backend); + return -EBUSY; + } + + if (!psi->write_user) + psi->write_user = pstore_write_user_compat; + psinfo = psi; + mutex_init(&psinfo->read_mutex); + spin_lock_init(&psinfo->buf_lock); + + if (psi->flags & PSTORE_FLAGS_DMESG) + allocate_buf_for_compression(); + + pstore_get_records(0); + + if (psi->flags & PSTORE_FLAGS_DMESG) { + pstore_dumper.max_reason = psinfo->max_reason; + pstore_register_kmsg(); + } + if (psi->flags & PSTORE_FLAGS_CONSOLE) + pstore_register_console(); + if (psi->flags & PSTORE_FLAGS_FTRACE) + pstore_register_ftrace(); + if (psi->flags & PSTORE_FLAGS_PMSG) + pstore_register_pmsg(); + + /* Start watching for new records, if desired. */ + pstore_timer_kick(); + + /* + * Update the module parameter backend, so it is visible + * through /sys/module/pstore/parameters/backend + */ + backend = new_backend; + + pr_info("Registered %s as persistent store backend\n", psi->name); + + mutex_unlock(&psinfo_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pstore_register); + +void pstore_unregister(struct pstore_info *psi) +{ + /* It's okay to unregister nothing. */ + if (!psi) + return; + + mutex_lock(&psinfo_lock); + + /* Only one backend can be registered at a time. */ + if (WARN_ON(psi != psinfo)) { + mutex_unlock(&psinfo_lock); + return; + } + + /* Unregister all callbacks. */ + if (psi->flags & PSTORE_FLAGS_PMSG) + pstore_unregister_pmsg(); + if (psi->flags & PSTORE_FLAGS_FTRACE) + pstore_unregister_ftrace(); + if (psi->flags & PSTORE_FLAGS_CONSOLE) + pstore_unregister_console(); + if (psi->flags & PSTORE_FLAGS_DMESG) + pstore_unregister_kmsg(); + + /* Stop timer and make sure all work has finished. */ + del_timer_sync(&pstore_timer); + flush_work(&pstore_work); + + /* Remove all backend records from filesystem tree. */ + pstore_put_backend_records(psi); + + free_buf_for_compression(); + + psinfo = NULL; + kfree(backend); + backend = NULL; + + pr_info("Unregistered %s as persistent store backend\n", psi->name); + mutex_unlock(&psinfo_lock); +} +EXPORT_SYMBOL_GPL(pstore_unregister); + +static void decompress_record(struct pstore_record *record, + struct z_stream_s *zstream) +{ + int ret; + int unzipped_len; + char *unzipped, *workspace; + size_t max_uncompressed_size; + + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed) + return; + + /* Only PSTORE_TYPE_DMESG support compression. */ + if (record->type != PSTORE_TYPE_DMESG) { + pr_warn("ignored compressed record type %d\n", record->type); + return; + } + + /* Missing compression buffer means compression was not initialized. */ + if (!zstream->workspace) { + pr_warn("no decompression method initialized!\n"); + return; + } + + ret = zlib_inflateReset(zstream); + if (ret != Z_OK) { + pr_err("zlib_inflateReset() failed, ret = %d!\n", ret); + return; + } + + /* Allocate enough space to hold max decompression and ECC. */ + max_uncompressed_size = 3 * psinfo->bufsize; + workspace = kvzalloc(max_uncompressed_size + record->ecc_notice_size, + GFP_KERNEL); + if (!workspace) + return; + + zstream->next_in = record->buf; + zstream->avail_in = record->size; + zstream->next_out = workspace; + zstream->avail_out = max_uncompressed_size; + + ret = zlib_inflate(zstream, Z_FINISH); + if (ret != Z_STREAM_END) { + pr_err_ratelimited("zlib_inflate() failed, ret = %d!\n", ret); + kvfree(workspace); + return; + } + + unzipped_len = zstream->total_out; + + /* Append ECC notice to decompressed buffer. */ + memcpy(workspace + unzipped_len, record->buf + record->size, + record->ecc_notice_size); + + /* Copy decompressed contents into an minimum-sized allocation. */ + unzipped = kvmemdup(workspace, unzipped_len + record->ecc_notice_size, + GFP_KERNEL); + kvfree(workspace); + if (!unzipped) + return; + + /* Swap out compressed contents with decompressed contents. */ + kvfree(record->buf); + record->buf = unzipped; + record->size = unzipped_len; + record->compressed = false; +} + +/* + * Read all the records from one persistent store backend. Create + * files in our filesystem. Don't warn about -EEXIST errors + * when we are re-scanning the backing store looking to add new + * error records. + */ +void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet) +{ + int failed = 0; + unsigned int stop_loop = 65536; + struct z_stream_s zstream = {}; + + if (!psi || !root) + return; + + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) { + zstream.workspace = kvmalloc(zlib_inflate_workspacesize(), + GFP_KERNEL); + zlib_inflateInit2(&zstream, -DEF_WBITS); + } + + mutex_lock(&psi->read_mutex); + if (psi->open && psi->open(psi)) + goto out; + + /* + * Backend callback read() allocates record.buf. decompress_record() + * may reallocate record.buf. On success, pstore_mkfile() will keep + * the record.buf, so free it only on failure. + */ + for (; stop_loop; stop_loop--) { + struct pstore_record *record; + int rc; + + record = kzalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + pr_err("out of memory creating record\n"); + break; + } + pstore_record_init(record, psi); + + record->size = psi->read(record); + + /* No more records left in backend? */ + if (record->size <= 0) { + kfree(record); + break; + } + + decompress_record(record, &zstream); + rc = pstore_mkfile(root, record); + if (rc) { + /* pstore_mkfile() did not take record, so free it. */ + kvfree(record->buf); + kfree(record->priv); + kfree(record); + if (rc != -EEXIST || !quiet) + failed++; + } + } + if (psi->close) + psi->close(psi); +out: + mutex_unlock(&psi->read_mutex); + + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) { + if (zlib_inflateEnd(&zstream) != Z_OK) + pr_warn("zlib_inflateEnd() failed\n"); + kvfree(zstream.workspace); + } + + if (failed) + pr_warn("failed to create %d record(s) from '%s'\n", + failed, psi->name); + if (!stop_loop) + pr_err("looping? Too many records seen from '%s'\n", + psi->name); +} + +static void pstore_dowork(struct work_struct *work) +{ + pstore_get_records(1); +} + +static void pstore_timefunc(struct timer_list *unused) +{ + if (pstore_new_entry) { + pstore_new_entry = 0; + schedule_work(&pstore_work); + } + + pstore_timer_kick(); +} + +static int __init pstore_init(void) +{ + int ret; + + ret = pstore_init_fs(); + if (ret) + free_buf_for_compression(); + + return ret; +} +late_initcall(pstore_init); + +static void __exit pstore_exit(void) +{ + pstore_exit_fs(); +} +module_exit(pstore_exit) + +MODULE_AUTHOR("Tony Luck "); +MODULE_LICENSE("GPL"); diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c new file mode 100644 index 0000000000..55f139afa3 --- /dev/null +++ b/fs/pstore/pmsg.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2014 Google, Inc. + */ + +#include +#include +#include +#include +#include "internal.h" + +static DEFINE_MUTEX(pmsg_lock); + +static ssize_t write_pmsg(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct pstore_record record; + int ret; + + if (!count) + return 0; + + pstore_record_init(&record, psinfo); + record.type = PSTORE_TYPE_PMSG; + record.size = count; + + /* check outside lock, page in any data. write_user also checks */ + if (!access_ok(buf, count)) + return -EFAULT; + + mutex_lock(&pmsg_lock); + ret = psinfo->write_user(&record, buf); + mutex_unlock(&pmsg_lock); + return ret ? ret : count; +} + +static const struct file_operations pmsg_fops = { + .owner = THIS_MODULE, + .llseek = noop_llseek, + .write = write_pmsg, +}; + +static struct class *pmsg_class; +static int pmsg_major; +#define PMSG_NAME "pmsg" +#undef pr_fmt +#define pr_fmt(fmt) PMSG_NAME ": " fmt + +static char *pmsg_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0220; + return NULL; +} + +void pstore_register_pmsg(void) +{ + struct device *pmsg_device; + + pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops); + if (pmsg_major < 0) { + pr_err("register_chrdev failed\n"); + goto err; + } + + pmsg_class = class_create(PMSG_NAME); + if (IS_ERR(pmsg_class)) { + pr_err("device class file already in use\n"); + goto err_class; + } + pmsg_class->devnode = pmsg_devnode; + + pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0), + NULL, "%s%d", PMSG_NAME, 0); + if (IS_ERR(pmsg_device)) { + pr_err("failed to create device\n"); + goto err_device; + } + return; + +err_device: + class_destroy(pmsg_class); +err_class: + unregister_chrdev(pmsg_major, PMSG_NAME); +err: + return; +} + +void pstore_unregister_pmsg(void) +{ + device_destroy(pmsg_class, MKDEV(pmsg_major, 0)); + class_destroy(pmsg_class); + unregister_chrdev(pmsg_major, PMSG_NAME); +} diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c new file mode 100644 index 0000000000..d36702c7ab --- /dev/null +++ b/fs/pstore/ram.c @@ -0,0 +1,982 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RAM Oops/Panic logger + * + * Copyright (C) 2010 Marco Stornelli + * Copyright (C) 2011 Kees Cook + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" +#include "ram_internal.h" + +#define RAMOOPS_KERNMSG_HDR "====" +#define MIN_MEM_SIZE 4096UL + +static ulong record_size = MIN_MEM_SIZE; +module_param(record_size, ulong, 0400); +MODULE_PARM_DESC(record_size, + "size of each dump done on oops/panic"); + +static ulong ramoops_console_size = MIN_MEM_SIZE; +module_param_named(console_size, ramoops_console_size, ulong, 0400); +MODULE_PARM_DESC(console_size, "size of kernel console log"); + +static ulong ramoops_ftrace_size = MIN_MEM_SIZE; +module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); +MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); + +static ulong ramoops_pmsg_size = MIN_MEM_SIZE; +module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400); +MODULE_PARM_DESC(pmsg_size, "size of user space message log"); + +static unsigned long long mem_address; +module_param_hw(mem_address, ullong, other, 0400); +MODULE_PARM_DESC(mem_address, + "start of reserved RAM used to store oops/panic logs"); + +static ulong mem_size; +module_param(mem_size, ulong, 0400); +MODULE_PARM_DESC(mem_size, + "size of reserved RAM used to store oops/panic logs"); + +static unsigned int mem_type; +module_param(mem_type, uint, 0400); +MODULE_PARM_DESC(mem_type, + "memory type: 0=write-combined (default), 1=unbuffered, 2=cached"); + +static int ramoops_max_reason = -1; +module_param_named(max_reason, ramoops_max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic) "); + +static int ramoops_ecc; +module_param_named(ecc, ramoops_ecc, int, 0400); +MODULE_PARM_DESC(ramoops_ecc, + "if non-zero, the option enables ECC support and specifies " + "ECC buffer size in bytes (1 is a special value, means 16 " + "bytes ECC)"); + +static int ramoops_dump_oops = -1; +module_param_named(dump_oops, ramoops_dump_oops, int, 0400); +MODULE_PARM_DESC(dump_oops, + "(deprecated: use max_reason instead) set to 1 to dump oopses & panics, 0 to only dump panics"); + +struct ramoops_context { + struct persistent_ram_zone **dprzs; /* Oops dump zones */ + struct persistent_ram_zone *cprz; /* Console zone */ + struct persistent_ram_zone **fprzs; /* Ftrace zones */ + struct persistent_ram_zone *mprz; /* PMSG zone */ + phys_addr_t phys_addr; + unsigned long size; + unsigned int memtype; + size_t record_size; + size_t console_size; + size_t ftrace_size; + size_t pmsg_size; + u32 flags; + struct persistent_ram_ecc_info ecc_info; + unsigned int max_dump_cnt; + unsigned int dump_write_cnt; + /* _read_cnt need clear on ramoops_pstore_open */ + unsigned int dump_read_cnt; + unsigned int console_read_cnt; + unsigned int max_ftrace_cnt; + unsigned int ftrace_read_cnt; + unsigned int pmsg_read_cnt; + struct pstore_info pstore; +}; + +static struct platform_device *dummy; + +static int ramoops_pstore_open(struct pstore_info *psi) +{ + struct ramoops_context *cxt = psi->data; + + cxt->dump_read_cnt = 0; + cxt->console_read_cnt = 0; + cxt->ftrace_read_cnt = 0; + cxt->pmsg_read_cnt = 0; + return 0; +} + +static struct persistent_ram_zone * +ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id, + struct pstore_record *record) +{ + struct persistent_ram_zone *prz; + + /* Give up if we never existed or have hit the end. */ + if (!przs) + return NULL; + + prz = przs[id]; + if (!prz) + return NULL; + + /* Update old/shadowed buffer. */ + if (prz->type == PSTORE_TYPE_DMESG) + persistent_ram_save_old(prz); + + if (!persistent_ram_old_size(prz)) + return NULL; + + record->type = prz->type; + record->id = id; + + return prz; +} + +static int ramoops_read_kmsg_hdr(char *buffer, struct timespec64 *time, + bool *compressed) +{ + char data_type; + int header_length = 0; + + if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lld.%lu-%c\n%n", + (time64_t *)&time->tv_sec, &time->tv_nsec, &data_type, + &header_length) == 3) { + time->tv_nsec *= 1000; + if (data_type == 'C') + *compressed = true; + else + *compressed = false; + } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lld.%lu\n%n", + (time64_t *)&time->tv_sec, &time->tv_nsec, + &header_length) == 2) { + time->tv_nsec *= 1000; + *compressed = false; + } else { + time->tv_sec = 0; + time->tv_nsec = 0; + *compressed = false; + } + return header_length; +} + +static bool prz_ok(struct persistent_ram_zone *prz) +{ + return !!prz && !!(persistent_ram_old_size(prz) + + persistent_ram_ecc_string(prz, NULL, 0)); +} + +static ssize_t ramoops_pstore_read(struct pstore_record *record) +{ + ssize_t size = 0; + struct ramoops_context *cxt = record->psi->data; + struct persistent_ram_zone *prz = NULL; + int header_length = 0; + bool free_prz = false; + + /* + * Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but + * PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have + * valid time stamps, so it is initialized to zero. + */ + record->time.tv_sec = 0; + record->time.tv_nsec = 0; + record->compressed = false; + + /* Find the next valid persistent_ram_zone for DMESG */ + while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) { + prz = ramoops_get_next_prz(cxt->dprzs, cxt->dump_read_cnt++, + record); + if (!prz_ok(prz)) + continue; + header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), + &record->time, + &record->compressed); + /* Clear and skip this DMESG record if it has no valid header */ + if (!header_length) { + persistent_ram_free_old(prz); + persistent_ram_zap(prz); + prz = NULL; + } + } + + if (!prz_ok(prz) && !cxt->console_read_cnt++) + prz = ramoops_get_next_prz(&cxt->cprz, 0 /* single */, record); + + if (!prz_ok(prz) && !cxt->pmsg_read_cnt++) + prz = ramoops_get_next_prz(&cxt->mprz, 0 /* single */, record); + + /* ftrace is last since it may want to dynamically allocate memory. */ + if (!prz_ok(prz)) { + if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) && + !cxt->ftrace_read_cnt++) { + prz = ramoops_get_next_prz(cxt->fprzs, 0 /* single */, + record); + } else { + /* + * Build a new dummy record which combines all the + * per-cpu records including metadata and ecc info. + */ + struct persistent_ram_zone *tmp_prz, *prz_next; + + tmp_prz = kzalloc(sizeof(struct persistent_ram_zone), + GFP_KERNEL); + if (!tmp_prz) + return -ENOMEM; + prz = tmp_prz; + free_prz = true; + + while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { + prz_next = ramoops_get_next_prz(cxt->fprzs, + cxt->ftrace_read_cnt++, record); + + if (!prz_ok(prz_next)) + continue; + + tmp_prz->ecc_info = prz_next->ecc_info; + tmp_prz->corrected_bytes += + prz_next->corrected_bytes; + tmp_prz->bad_blocks += prz_next->bad_blocks; + + size = pstore_ftrace_combine_log( + &tmp_prz->old_log, + &tmp_prz->old_log_size, + prz_next->old_log, + prz_next->old_log_size); + if (size) + goto out; + } + record->id = 0; + } + } + + if (!prz_ok(prz)) { + size = 0; + goto out; + } + + size = persistent_ram_old_size(prz) - header_length; + + /* ECC correction notice */ + record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); + + record->buf = kvzalloc(size + record->ecc_notice_size + 1, GFP_KERNEL); + if (record->buf == NULL) { + size = -ENOMEM; + goto out; + } + + memcpy(record->buf, (char *)persistent_ram_old(prz) + header_length, + size); + + persistent_ram_ecc_string(prz, record->buf + size, + record->ecc_notice_size + 1); + +out: + if (free_prz) { + kvfree(prz->old_log); + kfree(prz); + } + + return size; +} + +static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz, + struct pstore_record *record) +{ + char hdr[36]; /* "===="(4), %lld(20), "."(1), %06lu(6), "-%c\n"(3) */ + size_t len; + + len = scnprintf(hdr, sizeof(hdr), + RAMOOPS_KERNMSG_HDR "%lld.%06lu-%c\n", + (time64_t)record->time.tv_sec, + record->time.tv_nsec / 1000, + record->compressed ? 'C' : 'D'); + persistent_ram_write(prz, hdr, len); + + return len; +} + +static int notrace ramoops_pstore_write(struct pstore_record *record) +{ + struct ramoops_context *cxt = record->psi->data; + struct persistent_ram_zone *prz; + size_t size, hlen; + + if (record->type == PSTORE_TYPE_CONSOLE) { + if (!cxt->cprz) + return -ENOMEM; + persistent_ram_write(cxt->cprz, record->buf, record->size); + return 0; + } else if (record->type == PSTORE_TYPE_FTRACE) { + int zonenum; + + if (!cxt->fprzs) + return -ENOMEM; + /* + * Choose zone by if we're using per-cpu buffers. + */ + if (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + zonenum = smp_processor_id(); + else + zonenum = 0; + + persistent_ram_write(cxt->fprzs[zonenum], record->buf, + record->size); + return 0; + } else if (record->type == PSTORE_TYPE_PMSG) { + pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__); + return -EINVAL; + } + + if (record->type != PSTORE_TYPE_DMESG) + return -EINVAL; + + /* + * We could filter on record->reason here if we wanted to (which + * would duplicate what happened before the "max_reason" setting + * was added), but that would defeat the purpose of a system + * changing printk.always_kmsg_dump, so instead log everything that + * the kmsg dumper sends us, since it should be doing the filtering + * based on the combination of printk.always_kmsg_dump and our + * requested "max_reason". + */ + + /* + * Explicitly only take the first part of any new crash. + * If our buffer is larger than kmsg_bytes, this can never happen, + * and if our buffer is smaller than kmsg_bytes, we don't want the + * report split across multiple records. + */ + if (record->part != 1) + return -ENOSPC; + + if (!cxt->dprzs) + return -ENOSPC; + + prz = cxt->dprzs[cxt->dump_write_cnt]; + + /* + * Since this is a new crash dump, we need to reset the buffer in + * case it still has an old dump present. Without this, the new dump + * will get appended, which would seriously confuse anything trying + * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() + * expects to find a dump header in the beginning of buffer data, so + * we must to reset the buffer values, in order to ensure that the + * header will be written to the beginning of the buffer. + */ + persistent_ram_zap(prz); + + /* Build header and append record contents. */ + hlen = ramoops_write_kmsg_hdr(prz, record); + if (!hlen) + return -ENOMEM; + + size = record->size; + if (size + hlen > prz->buffer_size) + size = prz->buffer_size - hlen; + persistent_ram_write(prz, record->buf, size); + + cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt; + + return 0; +} + +static int notrace ramoops_pstore_write_user(struct pstore_record *record, + const char __user *buf) +{ + if (record->type == PSTORE_TYPE_PMSG) { + struct ramoops_context *cxt = record->psi->data; + + if (!cxt->mprz) + return -ENOMEM; + return persistent_ram_write_user(cxt->mprz, buf, record->size); + } + + return -EINVAL; +} + +static int ramoops_pstore_erase(struct pstore_record *record) +{ + struct ramoops_context *cxt = record->psi->data; + struct persistent_ram_zone *prz; + + switch (record->type) { + case PSTORE_TYPE_DMESG: + if (record->id >= cxt->max_dump_cnt) + return -EINVAL; + prz = cxt->dprzs[record->id]; + break; + case PSTORE_TYPE_CONSOLE: + prz = cxt->cprz; + break; + case PSTORE_TYPE_FTRACE: + if (record->id >= cxt->max_ftrace_cnt) + return -EINVAL; + prz = cxt->fprzs[record->id]; + break; + case PSTORE_TYPE_PMSG: + prz = cxt->mprz; + break; + default: + return -EINVAL; + } + + persistent_ram_free_old(prz); + persistent_ram_zap(prz); + + return 0; +} + +static struct ramoops_context oops_cxt = { + .pstore = { + .owner = THIS_MODULE, + .name = "ramoops", + .open = ramoops_pstore_open, + .read = ramoops_pstore_read, + .write = ramoops_pstore_write, + .write_user = ramoops_pstore_write_user, + .erase = ramoops_pstore_erase, + }, +}; + +static void ramoops_free_przs(struct ramoops_context *cxt) +{ + int i; + + /* Free pmsg PRZ */ + persistent_ram_free(&cxt->mprz); + + /* Free console PRZ */ + persistent_ram_free(&cxt->cprz); + + /* Free dump PRZs */ + if (cxt->dprzs) { + for (i = 0; i < cxt->max_dump_cnt; i++) + persistent_ram_free(&cxt->dprzs[i]); + + kfree(cxt->dprzs); + cxt->dprzs = NULL; + cxt->max_dump_cnt = 0; + } + + /* Free ftrace PRZs */ + if (cxt->fprzs) { + for (i = 0; i < cxt->max_ftrace_cnt; i++) + persistent_ram_free(&cxt->fprzs[i]); + kfree(cxt->fprzs); + cxt->fprzs = NULL; + cxt->max_ftrace_cnt = 0; + } +} + +static int ramoops_init_przs(const char *name, + struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone ***przs, + phys_addr_t *paddr, size_t mem_sz, + ssize_t record_size, + unsigned int *cnt, u32 sig, u32 flags) +{ + int err = -ENOMEM; + int i; + size_t zone_sz; + struct persistent_ram_zone **prz_ar; + + /* Allocate nothing for 0 mem_sz or 0 record_size. */ + if (mem_sz == 0 || record_size == 0) { + *cnt = 0; + return 0; + } + + /* + * If we have a negative record size, calculate it based on + * mem_sz / *cnt. If we have a positive record size, calculate + * cnt from mem_sz / record_size. + */ + if (record_size < 0) { + if (*cnt == 0) + return 0; + record_size = mem_sz / *cnt; + if (record_size == 0) { + dev_err(dev, "%s record size == 0 (%zu / %u)\n", + name, mem_sz, *cnt); + goto fail; + } + } else { + *cnt = mem_sz / record_size; + if (*cnt == 0) { + dev_err(dev, "%s record count == 0 (%zu / %zu)\n", + name, mem_sz, record_size); + goto fail; + } + } + + if (*paddr + mem_sz - cxt->phys_addr > cxt->size) { + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, + mem_sz, (unsigned long long)*paddr, + cxt->size, (unsigned long long)cxt->phys_addr); + goto fail; + } + + zone_sz = mem_sz / *cnt; + if (!zone_sz) { + dev_err(dev, "%s zone size == 0\n", name); + goto fail; + } + + prz_ar = kcalloc(*cnt, sizeof(**przs), GFP_KERNEL); + if (!prz_ar) + goto fail; + + for (i = 0; i < *cnt; i++) { + char *label; + + if (*cnt == 1) + label = kasprintf(GFP_KERNEL, "ramoops:%s", name); + else + label = kasprintf(GFP_KERNEL, "ramoops:%s(%d/%d)", + name, i, *cnt - 1); + prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig, + &cxt->ecc_info, + cxt->memtype, flags, label); + kfree(label); + if (IS_ERR(prz_ar[i])) { + err = PTR_ERR(prz_ar[i]); + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, record_size, + (unsigned long long)*paddr, err); + + while (i > 0) { + i--; + persistent_ram_free(&prz_ar[i]); + } + kfree(prz_ar); + prz_ar = NULL; + goto fail; + } + *paddr += zone_sz; + prz_ar[i]->type = pstore_name_to_type(name); + } + + *przs = prz_ar; + return 0; + +fail: + *cnt = 0; + return err; +} + +static int ramoops_init_prz(const char *name, + struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone **prz, + phys_addr_t *paddr, size_t sz, u32 sig) +{ + char *label; + + if (!sz) + return 0; + + if (*paddr + sz - cxt->phys_addr > cxt->size) { + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, sz, (unsigned long long)*paddr, + cxt->size, (unsigned long long)cxt->phys_addr); + return -ENOMEM; + } + + label = kasprintf(GFP_KERNEL, "ramoops:%s", name); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, + cxt->memtype, PRZ_FLAG_ZAP_OLD, label); + kfree(label); + if (IS_ERR(*prz)) { + int err = PTR_ERR(*prz); + + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, sz, (unsigned long long)*paddr, err); + return err; + } + + *paddr += sz; + (*prz)->type = pstore_name_to_type(name); + + return 0; +} + +/* Read a u32 from a dt property and make sure it's safe for an int. */ +static int ramoops_parse_dt_u32(struct platform_device *pdev, + const char *propname, + u32 default_value, u32 *value) +{ + u32 val32 = 0; + int ret; + + ret = of_property_read_u32(pdev->dev.of_node, propname, &val32); + if (ret == -EINVAL) { + /* field is missing, use default value. */ + val32 = default_value; + } else if (ret < 0) { + dev_err(&pdev->dev, "failed to parse property %s: %d\n", + propname, ret); + return ret; + } + + /* Sanity check our results. */ + if (val32 > INT_MAX) { + dev_err(&pdev->dev, "%s %u > INT_MAX\n", propname, val32); + return -EOVERFLOW; + } + + *value = val32; + return 0; +} + +static int ramoops_parse_dt(struct platform_device *pdev, + struct ramoops_platform_data *pdata) +{ + struct device_node *of_node = pdev->dev.of_node; + struct device_node *parent_node; + struct resource *res; + u32 value; + int ret; + + dev_dbg(&pdev->dev, "using Device Tree\n"); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, + "failed to locate DT /reserved-memory resource\n"); + return -EINVAL; + } + + pdata->mem_size = resource_size(res); + pdata->mem_address = res->start; + /* + * Setting "unbuffered" is deprecated and will be ignored if + * "mem_type" is also specified. + */ + pdata->mem_type = of_property_read_bool(of_node, "unbuffered"); + /* + * Setting "no-dump-oops" is deprecated and will be ignored if + * "max_reason" is also specified. + */ + if (of_property_read_bool(of_node, "no-dump-oops")) + pdata->max_reason = KMSG_DUMP_PANIC; + else + pdata->max_reason = KMSG_DUMP_OOPS; + +#define parse_u32(name, field, default_value) { \ + ret = ramoops_parse_dt_u32(pdev, name, default_value, \ + &value); \ + if (ret < 0) \ + return ret; \ + field = value; \ + } + + parse_u32("mem-type", pdata->mem_type, pdata->mem_type); + parse_u32("record-size", pdata->record_size, 0); + parse_u32("console-size", pdata->console_size, 0); + parse_u32("ftrace-size", pdata->ftrace_size, 0); + parse_u32("pmsg-size", pdata->pmsg_size, 0); + parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0); + parse_u32("flags", pdata->flags, 0); + parse_u32("max-reason", pdata->max_reason, pdata->max_reason); + +#undef parse_u32 + + /* + * Some old Chromebooks relied on the kernel setting the + * console_size and pmsg_size to the record size since that's + * what the downstream kernel did. These same Chromebooks had + * "ramoops" straight under the root node which isn't + * according to the current upstream bindings (though it was + * arguably acceptable under a prior version of the bindings). + * Let's make those old Chromebooks work by detecting that + * we're not a child of "reserved-memory" and mimicking the + * expected behavior. + */ + parent_node = of_get_parent(of_node); + if (!of_node_name_eq(parent_node, "reserved-memory") && + !pdata->console_size && !pdata->ftrace_size && + !pdata->pmsg_size && !pdata->ecc_info.ecc_size) { + pdata->console_size = pdata->record_size; + pdata->pmsg_size = pdata->record_size; + } + of_node_put(parent_node); + + return 0; +} + +static int ramoops_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ramoops_platform_data *pdata = dev->platform_data; + struct ramoops_platform_data pdata_local; + struct ramoops_context *cxt = &oops_cxt; + size_t dump_mem_sz; + phys_addr_t paddr; + int err = -EINVAL; + + /* + * Only a single ramoops area allowed at a time, so fail extra + * probes. + */ + if (cxt->max_dump_cnt) { + pr_err("already initialized\n"); + goto fail_out; + } + + if (dev_of_node(dev) && !pdata) { + pdata = &pdata_local; + memset(pdata, 0, sizeof(*pdata)); + + err = ramoops_parse_dt(pdev, pdata); + if (err < 0) + goto fail_out; + } + + /* Make sure we didn't get bogus platform data pointer. */ + if (!pdata) { + pr_err("NULL platform data\n"); + err = -EINVAL; + goto fail_out; + } + + if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && + !pdata->ftrace_size && !pdata->pmsg_size)) { + pr_err("The memory size and the record/console size must be " + "non-zero\n"); + err = -EINVAL; + goto fail_out; + } + + if (pdata->record_size && !is_power_of_2(pdata->record_size)) + pdata->record_size = rounddown_pow_of_two(pdata->record_size); + if (pdata->console_size && !is_power_of_2(pdata->console_size)) + pdata->console_size = rounddown_pow_of_two(pdata->console_size); + if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) + pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); + if (pdata->pmsg_size && !is_power_of_2(pdata->pmsg_size)) + pdata->pmsg_size = rounddown_pow_of_two(pdata->pmsg_size); + + cxt->size = pdata->mem_size; + cxt->phys_addr = pdata->mem_address; + cxt->memtype = pdata->mem_type; + cxt->record_size = pdata->record_size; + cxt->console_size = pdata->console_size; + cxt->ftrace_size = pdata->ftrace_size; + cxt->pmsg_size = pdata->pmsg_size; + cxt->flags = pdata->flags; + cxt->ecc_info = pdata->ecc_info; + + paddr = cxt->phys_addr; + + dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size + - cxt->pmsg_size; + err = ramoops_init_przs("dmesg", dev, cxt, &cxt->dprzs, &paddr, + dump_mem_sz, cxt->record_size, + &cxt->max_dump_cnt, 0, 0); + if (err) + goto fail_init; + + err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr, + cxt->console_size, 0); + if (err) + goto fail_init; + + err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr, + cxt->pmsg_size, 0); + if (err) + goto fail_init; + + cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? nr_cpu_ids + : 1; + err = ramoops_init_przs("ftrace", dev, cxt, &cxt->fprzs, &paddr, + cxt->ftrace_size, -1, + &cxt->max_ftrace_cnt, LINUX_VERSION_CODE, + (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? PRZ_FLAG_NO_LOCK : 0); + if (err) + goto fail_init; + + cxt->pstore.data = cxt; + /* + * Prepare frontend flags based on which areas are initialized. + * For ramoops_init_przs() cases, the "max count" variable tells + * if there are regions present. For ramoops_init_prz() cases, + * the single region size is how to check. + */ + cxt->pstore.flags = 0; + if (cxt->max_dump_cnt) { + cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + cxt->pstore.max_reason = pdata->max_reason; + } + if (cxt->console_size) + cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; + if (cxt->max_ftrace_cnt) + cxt->pstore.flags |= PSTORE_FLAGS_FTRACE; + if (cxt->pmsg_size) + cxt->pstore.flags |= PSTORE_FLAGS_PMSG; + + /* + * Since bufsize is only used for dmesg crash dumps, it + * must match the size of the dprz record (after PRZ header + * and ECC bytes have been accounted for). + */ + if (cxt->pstore.flags & PSTORE_FLAGS_DMESG) { + cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size; + cxt->pstore.buf = kvzalloc(cxt->pstore.bufsize, GFP_KERNEL); + if (!cxt->pstore.buf) { + pr_err("cannot allocate pstore crash dump buffer\n"); + err = -ENOMEM; + goto fail_clear; + } + } + + err = pstore_register(&cxt->pstore); + if (err) { + pr_err("registering with pstore failed\n"); + goto fail_buf; + } + + /* + * Update the module parameter variables as well so they are visible + * through /sys/module/ramoops/parameters/ + */ + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + record_size = pdata->record_size; + ramoops_max_reason = pdata->max_reason; + ramoops_console_size = pdata->console_size; + ramoops_pmsg_size = pdata->pmsg_size; + ramoops_ftrace_size = pdata->ftrace_size; + + pr_info("using 0x%lx@0x%llx, ecc: %d\n", + cxt->size, (unsigned long long)cxt->phys_addr, + cxt->ecc_info.ecc_size); + + return 0; + +fail_buf: + kvfree(cxt->pstore.buf); +fail_clear: + cxt->pstore.bufsize = 0; +fail_init: + ramoops_free_przs(cxt); +fail_out: + return err; +} + +static void ramoops_remove(struct platform_device *pdev) +{ + struct ramoops_context *cxt = &oops_cxt; + + pstore_unregister(&cxt->pstore); + + kvfree(cxt->pstore.buf); + cxt->pstore.bufsize = 0; + + ramoops_free_przs(cxt); +} + +static const struct of_device_id dt_match[] = { + { .compatible = "ramoops" }, + {} +}; + +static struct platform_driver ramoops_driver = { + .probe = ramoops_probe, + .remove_new = ramoops_remove, + .driver = { + .name = "ramoops", + .of_match_table = dt_match, + }, +}; + +static inline void ramoops_unregister_dummy(void) +{ + platform_device_unregister(dummy); + dummy = NULL; +} + +static void __init ramoops_register_dummy(void) +{ + struct ramoops_platform_data pdata; + + /* + * Prepare a dummy platform data structure to carry the module + * parameters. If mem_size isn't set, then there are no module + * parameters, and we can skip this. + */ + if (!mem_size) + return; + + pr_info("using module parameters\n"); + + memset(&pdata, 0, sizeof(pdata)); + pdata.mem_size = mem_size; + pdata.mem_address = mem_address; + pdata.mem_type = mem_type; + pdata.record_size = record_size; + pdata.console_size = ramoops_console_size; + pdata.ftrace_size = ramoops_ftrace_size; + pdata.pmsg_size = ramoops_pmsg_size; + /* If "max_reason" is set, its value has priority over "dump_oops". */ + if (ramoops_max_reason >= 0) + pdata.max_reason = ramoops_max_reason; + /* Otherwise, if "dump_oops" is set, parse it into "max_reason". */ + else if (ramoops_dump_oops != -1) + pdata.max_reason = ramoops_dump_oops ? KMSG_DUMP_OOPS + : KMSG_DUMP_PANIC; + /* And if neither are explicitly set, use the default. */ + else + pdata.max_reason = KMSG_DUMP_OOPS; + pdata.flags = RAMOOPS_FLAG_FTRACE_PER_CPU; + + /* + * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC + * (using 1 byte for ECC isn't much of use anyway). + */ + pdata.ecc_info.ecc_size = ramoops_ecc == 1 ? 16 : ramoops_ecc; + + dummy = platform_device_register_data(NULL, "ramoops", -1, + &pdata, sizeof(pdata)); + if (IS_ERR(dummy)) { + pr_info("could not create platform device: %ld\n", + PTR_ERR(dummy)); + dummy = NULL; + } +} + +static int __init ramoops_init(void) +{ + int ret; + + ramoops_register_dummy(); + ret = platform_driver_register(&ramoops_driver); + if (ret != 0) + ramoops_unregister_dummy(); + + return ret; +} +postcore_initcall(ramoops_init); + +static void __exit ramoops_exit(void) +{ + platform_driver_unregister(&ramoops_driver); + ramoops_unregister_dummy(); +} +module_exit(ramoops_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marco Stornelli "); +MODULE_DESCRIPTION("RAM Oops/Panic logger/driver"); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c new file mode 100644 index 0000000000..f1848cdd6d --- /dev/null +++ b/fs/pstore/ram_core.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Google, Inc. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ram_internal.h" + +/** + * struct persistent_ram_buffer - persistent circular RAM buffer + * + * @sig: Signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value) + * @start: First valid byte in the buffer. + * @size: Number of valid bytes in the buffer. + * @data: The contents of the buffer. + */ +struct persistent_ram_buffer { + uint32_t sig; + atomic_t start; + atomic_t size; + uint8_t data[]; +}; + +#define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */ + +static inline size_t buffer_size(struct persistent_ram_zone *prz) +{ + return atomic_read(&prz->buffer->size); +} + +static inline size_t buffer_start(struct persistent_ram_zone *prz) +{ + return atomic_read(&prz->buffer->start); +} + +/* increase and wrap the start pointer, returning the old value */ +static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) +{ + int old; + int new; + unsigned long flags = 0; + + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); + + old = atomic_read(&prz->buffer->start); + new = old + a; + while (unlikely(new >= prz->buffer_size)) + new -= prz->buffer_size; + atomic_set(&prz->buffer->start, new); + + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); + + return old; +} + +/* increase the size counter until it hits the max size */ +static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) +{ + size_t old; + size_t new; + unsigned long flags = 0; + + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); + + old = atomic_read(&prz->buffer->size); + if (old == prz->buffer_size) + goto exit; + + new = old + a; + if (new > prz->buffer_size) + new = prz->buffer_size; + atomic_set(&prz->buffer->size, new); + +exit: + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); +} + +static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, + uint8_t *data, size_t len, uint8_t *ecc) +{ + int i; + + /* Initialize the parity buffer */ + memset(prz->ecc_info.par, 0, + prz->ecc_info.ecc_size * sizeof(prz->ecc_info.par[0])); + encode_rs8(prz->rs_decoder, data, len, prz->ecc_info.par, 0); + for (i = 0; i < prz->ecc_info.ecc_size; i++) + ecc[i] = prz->ecc_info.par[i]; +} + +static int persistent_ram_decode_rs8(struct persistent_ram_zone *prz, + void *data, size_t len, uint8_t *ecc) +{ + int i; + + for (i = 0; i < prz->ecc_info.ecc_size; i++) + prz->ecc_info.par[i] = ecc[i]; + return decode_rs8(prz->rs_decoder, data, prz->ecc_info.par, len, + NULL, 0, NULL, 0, NULL); +} + +static void notrace persistent_ram_update_ecc(struct persistent_ram_zone *prz, + unsigned int start, unsigned int count) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + uint8_t *buffer_end = buffer->data + prz->buffer_size; + uint8_t *block; + uint8_t *par; + int ecc_block_size = prz->ecc_info.block_size; + int ecc_size = prz->ecc_info.ecc_size; + int size = ecc_block_size; + + if (!ecc_size) + return; + + block = buffer->data + (start & ~(ecc_block_size - 1)); + par = prz->par_buffer + (start / ecc_block_size) * ecc_size; + + do { + if (block + ecc_block_size > buffer_end) + size = buffer_end - block; + persistent_ram_encode_rs8(prz, block, size, par); + block += ecc_block_size; + par += ecc_size; + } while (block < buffer->data + start + count); +} + +static void persistent_ram_update_header_ecc(struct persistent_ram_zone *prz) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + + if (!prz->ecc_info.ecc_size) + return; + + persistent_ram_encode_rs8(prz, (uint8_t *)buffer, sizeof(*buffer), + prz->par_header); +} + +static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + uint8_t *block; + uint8_t *par; + + if (!prz->ecc_info.ecc_size) + return; + + block = buffer->data; + par = prz->par_buffer; + while (block < buffer->data + buffer_size(prz)) { + int numerr; + int size = prz->ecc_info.block_size; + if (block + size > buffer->data + prz->buffer_size) + size = buffer->data + prz->buffer_size - block; + numerr = persistent_ram_decode_rs8(prz, block, size, par); + if (numerr > 0) { + pr_devel("error in block %p, %d\n", block, numerr); + prz->corrected_bytes += numerr; + } else if (numerr < 0) { + pr_devel("uncorrectable error in block %p\n", block); + prz->bad_blocks++; + } + block += prz->ecc_info.block_size; + par += prz->ecc_info.ecc_size; + } +} + +static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, + struct persistent_ram_ecc_info *ecc_info) +{ + int numerr; + struct persistent_ram_buffer *buffer = prz->buffer; + size_t ecc_blocks; + size_t ecc_total; + + if (!ecc_info || !ecc_info->ecc_size) + return 0; + + prz->ecc_info.block_size = ecc_info->block_size ?: 128; + prz->ecc_info.ecc_size = ecc_info->ecc_size ?: 16; + prz->ecc_info.symsize = ecc_info->symsize ?: 8; + prz->ecc_info.poly = ecc_info->poly ?: 0x11d; + + ecc_blocks = DIV_ROUND_UP(prz->buffer_size - prz->ecc_info.ecc_size, + prz->ecc_info.block_size + + prz->ecc_info.ecc_size); + ecc_total = (ecc_blocks + 1) * prz->ecc_info.ecc_size; + if (ecc_total >= prz->buffer_size) { + pr_err("%s: invalid ecc_size %u (total %zu, buffer size %zu)\n", + __func__, prz->ecc_info.ecc_size, + ecc_total, prz->buffer_size); + return -EINVAL; + } + + prz->buffer_size -= ecc_total; + prz->par_buffer = buffer->data + prz->buffer_size; + prz->par_header = prz->par_buffer + + ecc_blocks * prz->ecc_info.ecc_size; + + /* + * first consecutive root is 0 + * primitive element to generate roots = 1 + */ + prz->rs_decoder = init_rs(prz->ecc_info.symsize, prz->ecc_info.poly, + 0, 1, prz->ecc_info.ecc_size); + if (prz->rs_decoder == NULL) { + pr_info("init_rs failed\n"); + return -EINVAL; + } + + /* allocate workspace instead of using stack VLA */ + prz->ecc_info.par = kmalloc_array(prz->ecc_info.ecc_size, + sizeof(*prz->ecc_info.par), + GFP_KERNEL); + if (!prz->ecc_info.par) { + pr_err("cannot allocate ECC parity workspace\n"); + return -ENOMEM; + } + + prz->corrected_bytes = 0; + prz->bad_blocks = 0; + + numerr = persistent_ram_decode_rs8(prz, buffer, sizeof(*buffer), + prz->par_header); + if (numerr > 0) { + pr_info("error in header, %d\n", numerr); + prz->corrected_bytes += numerr; + } else if (numerr < 0) { + pr_info_ratelimited("uncorrectable error in header\n"); + prz->bad_blocks++; + } + + return 0; +} + +ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, + char *str, size_t len) +{ + ssize_t ret; + + if (!prz->ecc_info.ecc_size) + return 0; + + if (prz->corrected_bytes || prz->bad_blocks) + ret = snprintf(str, len, "" + "\nECC: %d Corrected bytes, %d unrecoverable blocks\n", + prz->corrected_bytes, prz->bad_blocks); + else + ret = snprintf(str, len, "\nECC: No errors detected\n"); + + return ret; +} + +static void notrace persistent_ram_update(struct persistent_ram_zone *prz, + const void *s, unsigned int start, unsigned int count) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + memcpy_toio(buffer->data + start, s, count); + persistent_ram_update_ecc(prz, start, count); +} + +static int notrace persistent_ram_update_user(struct persistent_ram_zone *prz, + const void __user *s, unsigned int start, unsigned int count) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + int ret = unlikely(copy_from_user(buffer->data + start, s, count)) ? + -EFAULT : 0; + persistent_ram_update_ecc(prz, start, count); + return ret; +} + +void persistent_ram_save_old(struct persistent_ram_zone *prz) +{ + struct persistent_ram_buffer *buffer = prz->buffer; + size_t size = buffer_size(prz); + size_t start = buffer_start(prz); + + if (!size) + return; + + if (!prz->old_log) { + persistent_ram_ecc_old(prz); + prz->old_log = kvzalloc(size, GFP_KERNEL); + } + if (!prz->old_log) { + pr_err("failed to allocate buffer\n"); + return; + } + + prz->old_log_size = size; + memcpy_fromio(prz->old_log, &buffer->data[start], size - start); + memcpy_fromio(prz->old_log + size - start, &buffer->data[0], start); +} + +int notrace persistent_ram_write(struct persistent_ram_zone *prz, + const void *s, unsigned int count) +{ + int rem; + int c = count; + size_t start; + + if (unlikely(c > prz->buffer_size)) { + s += c - prz->buffer_size; + c = prz->buffer_size; + } + + buffer_size_add(prz, c); + + start = buffer_start_add(prz, c); + + rem = prz->buffer_size - start; + if (unlikely(rem < c)) { + persistent_ram_update(prz, s, start, rem); + s += rem; + c -= rem; + start = 0; + } + persistent_ram_update(prz, s, start, c); + + persistent_ram_update_header_ecc(prz); + + return count; +} + +int notrace persistent_ram_write_user(struct persistent_ram_zone *prz, + const void __user *s, unsigned int count) +{ + int rem, ret = 0, c = count; + size_t start; + + if (unlikely(c > prz->buffer_size)) { + s += c - prz->buffer_size; + c = prz->buffer_size; + } + + buffer_size_add(prz, c); + + start = buffer_start_add(prz, c); + + rem = prz->buffer_size - start; + if (unlikely(rem < c)) { + ret = persistent_ram_update_user(prz, s, start, rem); + s += rem; + c -= rem; + start = 0; + } + if (likely(!ret)) + ret = persistent_ram_update_user(prz, s, start, c); + + persistent_ram_update_header_ecc(prz); + + return unlikely(ret) ? ret : count; +} + +size_t persistent_ram_old_size(struct persistent_ram_zone *prz) +{ + return prz->old_log_size; +} + +void *persistent_ram_old(struct persistent_ram_zone *prz) +{ + return prz->old_log; +} + +void persistent_ram_free_old(struct persistent_ram_zone *prz) +{ + kvfree(prz->old_log); + prz->old_log = NULL; + prz->old_log_size = 0; +} + +void persistent_ram_zap(struct persistent_ram_zone *prz) +{ + atomic_set(&prz->buffer->start, 0); + atomic_set(&prz->buffer->size, 0); + persistent_ram_update_header_ecc(prz); +} + +#define MEM_TYPE_WCOMBINE 0 +#define MEM_TYPE_NONCACHED 1 +#define MEM_TYPE_NORMAL 2 + +static void *persistent_ram_vmap(phys_addr_t start, size_t size, + unsigned int memtype) +{ + struct page **pages; + phys_addr_t page_start; + unsigned int page_count; + pgprot_t prot; + unsigned int i; + void *vaddr; + + page_start = start - offset_in_page(start); + page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); + + switch (memtype) { + case MEM_TYPE_NORMAL: + prot = PAGE_KERNEL; + break; + case MEM_TYPE_NONCACHED: + prot = pgprot_noncached(PAGE_KERNEL); + break; + case MEM_TYPE_WCOMBINE: + prot = pgprot_writecombine(PAGE_KERNEL); + break; + default: + pr_err("invalid mem_type=%d\n", memtype); + return NULL; + } + + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) { + pr_err("%s: Failed to allocate array for %u pages\n", + __func__, page_count); + return NULL; + } + + for (i = 0; i < page_count; i++) { + phys_addr_t addr = page_start + i * PAGE_SIZE; + pages[i] = pfn_to_page(addr >> PAGE_SHIFT); + } + /* + * VM_IOREMAP used here to bypass this region during vread() + * and kmap_atomic() (i.e. kcore) to avoid __va() failures. + */ + vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot); + kfree(pages); + + /* + * Since vmap() uses page granularity, we must add the offset + * into the page here, to get the byte granularity address + * into the mapping to represent the actual "start" location. + */ + return vaddr + offset_in_page(start); +} + +static void *persistent_ram_iomap(phys_addr_t start, size_t size, + unsigned int memtype, char *label) +{ + void *va; + + if (!request_mem_region(start, size, label ?: "ramoops")) { + pr_err("request mem region (%s 0x%llx@0x%llx) failed\n", + label ?: "ramoops", + (unsigned long long)size, (unsigned long long)start); + return NULL; + } + + if (memtype) + va = ioremap(start, size); + else + va = ioremap_wc(start, size); + + /* + * Since request_mem_region() and ioremap() are byte-granularity + * there is no need handle anything special like we do when the + * vmap() case in persistent_ram_vmap() above. + */ + return va; +} + +static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, + struct persistent_ram_zone *prz, int memtype) +{ + prz->paddr = start; + prz->size = size; + + if (pfn_valid(start >> PAGE_SHIFT)) + prz->vaddr = persistent_ram_vmap(start, size, memtype); + else + prz->vaddr = persistent_ram_iomap(start, size, memtype, + prz->label); + + if (!prz->vaddr) { + pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__, + (unsigned long long)size, (unsigned long long)start); + return -ENOMEM; + } + + prz->buffer = prz->vaddr; + prz->buffer_size = size - sizeof(struct persistent_ram_buffer); + + return 0; +} + +static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, + struct persistent_ram_ecc_info *ecc_info) +{ + int ret; + bool zap = !!(prz->flags & PRZ_FLAG_ZAP_OLD); + + ret = persistent_ram_init_ecc(prz, ecc_info); + if (ret) { + pr_warn("ECC failed %s\n", prz->label); + return ret; + } + + sig ^= PERSISTENT_RAM_SIG; + + if (prz->buffer->sig == sig) { + if (buffer_size(prz) == 0 && buffer_start(prz) == 0) { + pr_debug("found existing empty buffer\n"); + return 0; + } + + if (buffer_size(prz) > prz->buffer_size || + buffer_start(prz) > buffer_size(prz)) { + pr_info("found existing invalid buffer, size %zu, start %zu\n", + buffer_size(prz), buffer_start(prz)); + zap = true; + } else { + pr_debug("found existing buffer, size %zu, start %zu\n", + buffer_size(prz), buffer_start(prz)); + persistent_ram_save_old(prz); + } + } else { + pr_debug("no valid data in buffer (sig = 0x%08x)\n", + prz->buffer->sig); + prz->buffer->sig = sig; + zap = true; + } + + /* Reset missing, invalid, or single-use memory area. */ + if (zap) + persistent_ram_zap(prz); + + return 0; +} + +void persistent_ram_free(struct persistent_ram_zone **_prz) +{ + struct persistent_ram_zone *prz; + + if (!_prz) + return; + + prz = *_prz; + if (!prz) + return; + + if (prz->vaddr) { + if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { + /* We must vunmap() at page-granularity. */ + vunmap(prz->vaddr - offset_in_page(prz->paddr)); + } else { + iounmap(prz->vaddr); + release_mem_region(prz->paddr, prz->size); + } + prz->vaddr = NULL; + } + if (prz->rs_decoder) { + free_rs(prz->rs_decoder); + prz->rs_decoder = NULL; + } + kfree(prz->ecc_info.par); + prz->ecc_info.par = NULL; + + persistent_ram_free_old(prz); + kfree(prz->label); + kfree(prz); + *_prz = NULL; +} + +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype, u32 flags, char *label) +{ + struct persistent_ram_zone *prz; + int ret = -ENOMEM; + + prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL); + if (!prz) { + pr_err("failed to allocate persistent ram zone\n"); + goto err; + } + + /* Initialize general buffer state. */ + raw_spin_lock_init(&prz->buffer_lock); + prz->flags = flags; + prz->label = kstrdup(label, GFP_KERNEL); + if (!prz->label) + goto err; + + ret = persistent_ram_buffer_map(start, size, prz, memtype); + if (ret) + goto err; + + ret = persistent_ram_post_init(prz, sig, ecc_info); + if (ret) + goto err; + + pr_debug("attached %s 0x%zx@0x%llx: %zu header, %zu data, %zu ecc (%d/%d)\n", + prz->label, prz->size, (unsigned long long)prz->paddr, + sizeof(*prz->buffer), prz->buffer_size, + prz->size - sizeof(*prz->buffer) - prz->buffer_size, + prz->ecc_info.ecc_size, prz->ecc_info.block_size); + + return prz; +err: + persistent_ram_free(&prz); + return ERR_PTR(ret); +} diff --git a/fs/pstore/ram_internal.h b/fs/pstore/ram_internal.h new file mode 100644 index 0000000000..5f69469835 --- /dev/null +++ b/fs/pstore/ram_internal.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2010 Marco Stornelli + * Copyright (C) 2011 Kees Cook + * Copyright (C) 2011 Google, Inc. + */ + +#include + +/* + * Choose whether access to the RAM zone requires locking or not. If a zone + * can be written to from different CPUs like with ftrace for example, then + * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. + */ +#define PRZ_FLAG_NO_LOCK BIT(0) +/* + * If a PRZ should only have a single-boot lifetime, this marks it as + * getting wiped after its contents get copied out after boot. + */ +#define PRZ_FLAG_ZAP_OLD BIT(1) + +/** + * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ) + * used as a pstore backend + * + * @paddr: physical address of the mapped RAM area + * @size: size of mapping + * @label: unique name of this PRZ + * @type: frontend type for this PRZ + * @flags: holds PRZ_FLAGS_* bits + * + * @buffer_lock: + * locks access to @buffer "size" bytes and "start" offset + * @buffer: + * pointer to actual RAM area managed by this PRZ + * @buffer_size: + * bytes in @buffer->data (not including any trailing ECC bytes) + * + * @par_buffer: + * pointer into @buffer->data containing ECC bytes for @buffer->data + * @par_header: + * pointer into @buffer->data containing ECC bytes for @buffer header + * (i.e. all fields up to @data) + * @rs_decoder: + * RSLIB instance for doing ECC calculations + * @corrected_bytes: + * ECC corrected bytes accounting since boot + * @bad_blocks: + * ECC uncorrectable bytes accounting since boot + * @ecc_info: + * ECC configuration details + * + * @old_log: + * saved copy of @buffer->data prior to most recent wipe + * @old_log_size: + * bytes contained in @old_log + * + */ +struct persistent_ram_zone { + phys_addr_t paddr; + size_t size; + void *vaddr; + char *label; + enum pstore_type_id type; + u32 flags; + + raw_spinlock_t buffer_lock; + struct persistent_ram_buffer *buffer; + size_t buffer_size; + + char *par_buffer; + char *par_header; + struct rs_control *rs_decoder; + int corrected_bytes; + int bad_blocks; + struct persistent_ram_ecc_info ecc_info; + + char *old_log; + size_t old_log_size; +}; + +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype, u32 flags, char *label); +void persistent_ram_free(struct persistent_ram_zone **_prz); +void persistent_ram_zap(struct persistent_ram_zone *prz); + +int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, + unsigned int count); +int persistent_ram_write_user(struct persistent_ram_zone *prz, + const void __user *s, unsigned int count); + +void persistent_ram_save_old(struct persistent_ram_zone *prz); +size_t persistent_ram_old_size(struct persistent_ram_zone *prz); +void *persistent_ram_old(struct persistent_ram_zone *prz); +void persistent_ram_free_old(struct persistent_ram_zone *prz); +ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, + char *str, size_t len); diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c new file mode 100644 index 0000000000..2770746bb7 --- /dev/null +++ b/fs/pstore/zone.c @@ -0,0 +1,1468 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide a pstore intermediate backend, organized into kernel memory + * allocated zones that are then mapped and flushed into a single + * contiguous region on a storage backend of some kind (block, mtd, etc). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/** + * struct psz_buffer - header of zone to flush to storage + * + * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) + * @datalen: length of data in @data + * @start: offset into @data where the beginning of the stored bytes begin + * @data: zone data. + */ +struct psz_buffer { +#define PSZ_SIG (0x43474244) /* DBGC */ + uint32_t sig; + atomic_t datalen; + atomic_t start; + uint8_t data[]; +}; + +/** + * struct psz_kmsg_header - kmsg dump-specific header to flush to storage + * + * @magic: magic num for kmsg dump header + * @time: kmsg dump trigger time + * @compressed: whether conpressed + * @counter: kmsg dump counter + * @reason: the kmsg dump reason (e.g. oops, panic, etc) + * @data: pointer to log data + * + * This is a sub-header for a kmsg dump, trailing after &psz_buffer. + */ +struct psz_kmsg_header { +#define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */ + uint32_t magic; + struct timespec64 time; + bool compressed; + uint32_t counter; + enum kmsg_dump_reason reason; + uint8_t data[]; +}; + +/** + * struct pstore_zone - single stored buffer + * + * @off: zone offset of storage + * @type: front-end type for this zone + * @name: front-end name for this zone + * @buffer: pointer to data buffer managed by this zone + * @oldbuf: pointer to old data buffer + * @buffer_size: bytes in @buffer->data + * @should_recover: whether this zone should recover from storage + * @dirty: whether the data in @buffer dirty + * + * zone structure in memory. + */ +struct pstore_zone { + loff_t off; + const char *name; + enum pstore_type_id type; + + struct psz_buffer *buffer; + struct psz_buffer *oldbuf; + size_t buffer_size; + bool should_recover; + atomic_t dirty; +}; + +/** + * struct psz_context - all about running state of pstore/zone + * + * @kpszs: kmsg dump storage zones + * @ppsz: pmsg storage zone + * @cpsz: console storage zone + * @fpszs: ftrace storage zones + * @kmsg_max_cnt: max count of @kpszs + * @kmsg_read_cnt: counter of total read kmsg dumps + * @kmsg_write_cnt: counter of total kmsg dump writes + * @pmsg_read_cnt: counter of total read pmsg zone + * @console_read_cnt: counter of total read console zone + * @ftrace_max_cnt: max count of @fpszs + * @ftrace_read_cnt: counter of max read ftrace zone + * @oops_counter: counter of oops dumps + * @panic_counter: counter of panic dumps + * @recovered: whether finished recovering data from storage + * @on_panic: whether panic is happening + * @pstore_zone_info_lock: lock to @pstore_zone_info + * @pstore_zone_info: information from backend + * @pstore: structure for pstore + */ +struct psz_context { + struct pstore_zone **kpszs; + struct pstore_zone *ppsz; + struct pstore_zone *cpsz; + struct pstore_zone **fpszs; + unsigned int kmsg_max_cnt; + unsigned int kmsg_read_cnt; + unsigned int kmsg_write_cnt; + unsigned int pmsg_read_cnt; + unsigned int console_read_cnt; + unsigned int ftrace_max_cnt; + unsigned int ftrace_read_cnt; + /* + * These counters should be calculated during recovery. + * It records the oops/panic times after crashes rather than boots. + */ + unsigned int oops_counter; + unsigned int panic_counter; + atomic_t recovered; + atomic_t on_panic; + + /* + * pstore_zone_info_lock protects this entire structure during calls + * to register_pstore_zone()/unregister_pstore_zone(). + */ + struct mutex pstore_zone_info_lock; + struct pstore_zone_info *pstore_zone_info; + struct pstore_info pstore; +}; +static struct psz_context pstore_zone_cxt; + +static void psz_flush_all_dirty_zones(struct work_struct *); +static DECLARE_DELAYED_WORK(psz_cleaner, psz_flush_all_dirty_zones); + +/** + * enum psz_flush_mode - flush mode for psz_zone_write() + * + * @FLUSH_NONE: do not flush to storage but update data on memory + * @FLUSH_PART: just flush part of data including meta data to storage + * @FLUSH_META: just flush meta data of zone to storage + * @FLUSH_ALL: flush all of zone + */ +enum psz_flush_mode { + FLUSH_NONE = 0, + FLUSH_PART, + FLUSH_META, + FLUSH_ALL, +}; + +static inline int buffer_datalen(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->datalen); +} + +static inline int buffer_start(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->start); +} + +static inline bool is_on_panic(void) +{ + return atomic_read(&pstore_zone_cxt.on_panic); +} + +static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone || !zone->buffer) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->buffer->data + off, len); + return len; +} + +static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone || !zone->oldbuf) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->oldbuf->data + off, len); + return 0; +} + +static int psz_zone_write(struct pstore_zone *zone, + enum psz_flush_mode flush_mode, const char *buf, + size_t len, unsigned long off) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + ssize_t wcnt = 0; + ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos); + size_t wlen; + + if (off > zone->buffer_size) + return -EINVAL; + + wlen = min_t(size_t, len, zone->buffer_size - off); + if (buf && wlen) { + memcpy(zone->buffer->data + off, buf, wlen); + atomic_set(&zone->buffer->datalen, wlen + off); + } + + /* avoid to damage old records */ + if (!is_on_panic() && !atomic_read(&pstore_zone_cxt.recovered)) + goto dirty; + + writeop = is_on_panic() ? info->panic_write : info->write; + if (!writeop) + goto dirty; + + switch (flush_mode) { + case FLUSH_NONE: + if (unlikely(buf && wlen)) + goto dirty; + return 0; + case FLUSH_PART: + wcnt = writeop((const char *)zone->buffer->data + off, wlen, + zone->off + sizeof(*zone->buffer) + off); + if (wcnt != wlen) + goto dirty; + fallthrough; + case FLUSH_META: + wlen = sizeof(struct psz_buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + case FLUSH_ALL: + wlen = zone->buffer_size + sizeof(*zone->buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + } + + return 0; +dirty: + /* no need to mark dirty if going to try next zone */ + if (wcnt == -ENOMSG) + return -ENOMSG; + atomic_set(&zone->dirty, true); + /* flush dirty zones nicely */ + if (wcnt == -EBUSY && !is_on_panic()) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500)); + return -EBUSY; +} + +static int psz_flush_dirty_zone(struct pstore_zone *zone) +{ + int ret; + + if (unlikely(!zone)) + return -EINVAL; + + if (unlikely(!atomic_read(&pstore_zone_cxt.recovered))) + return -EBUSY; + + if (!atomic_xchg(&zone->dirty, false)) + return 0; + + ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0); + if (ret) + atomic_set(&zone->dirty, true); + return ret; +} + +static int psz_flush_dirty_zones(struct pstore_zone **zones, unsigned int cnt) +{ + int i, ret; + struct pstore_zone *zone; + + if (!zones) + return -EINVAL; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (!zone) + return -EINVAL; + ret = psz_flush_dirty_zone(zone); + if (ret) + return ret; + } + return 0; +} + +static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new) +{ + const char *data = (const char *)old->buffer->data; + int ret; + + ret = psz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0); + if (ret) { + atomic_set(&new->buffer->datalen, 0); + atomic_set(&new->dirty, false); + return ret; + } + atomic_set(&old->buffer->datalen, 0); + return 0; +} + +static void psz_flush_all_dirty_zones(struct work_struct *work) +{ + struct psz_context *cxt = &pstore_zone_cxt; + int ret = 0; + + if (cxt->ppsz) + ret |= psz_flush_dirty_zone(cxt->ppsz); + if (cxt->cpsz) + ret |= psz_flush_dirty_zone(cxt->cpsz); + if (cxt->kpszs) + ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + if (cxt->fpszs) + ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt); + if (ret && cxt->pstore_zone_info) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000)); +} + +static int psz_kmsg_recover_data(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone = NULL; + struct psz_buffer *buf; + unsigned long i; + ssize_t rcnt; + + if (!info->read) + return -EINVAL; + + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + if (atomic_read(&zone->dirty)) { + unsigned int wcnt = cxt->kmsg_write_cnt; + struct pstore_zone *new = cxt->kpszs[wcnt]; + int ret; + + ret = psz_move_zone(zone, new); + if (ret) { + pr_err("move zone from %lu to %d failed\n", + i, wcnt); + return ret; + } + cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt; + } + if (!zone->should_recover) + continue; + buf = zone->buffer; + rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf), + zone->off); + if (rcnt != zone->buffer_size + sizeof(*buf)) + return rcnt < 0 ? rcnt : -EIO; + } + return 0; +} + +static int psz_kmsg_recover_meta(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone; + ssize_t rcnt, len; + struct psz_buffer *buf; + struct psz_kmsg_header *hdr; + struct timespec64 time = { }; + unsigned long i; + /* + * Recover may on panic, we can't allocate any memory by kmalloc. + * So, we use local array instead. + */ + char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0}; + + if (!info->read) + return -EINVAL; + + len = sizeof(*buf) + sizeof(*hdr); + buf = (struct psz_buffer *)buffer_header; + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + + rcnt = info->read((char *)buf, len, zone->off); + if (rcnt == -ENOMSG) { + pr_debug("%s with id %lu may be broken, skip\n", + zone->name, i); + continue; + } else if (rcnt != len) { + pr_err("read %s with id %lu failed\n", zone->name, i); + return rcnt < 0 ? rcnt : -EIO; + } + + if (buf->sig != zone->buffer->sig) { + pr_debug("no valid data in kmsg dump zone %lu\n", i); + continue; + } + + if (zone->buffer_size < atomic_read(&buf->datalen)) { + pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + hdr = (struct psz_kmsg_header *)buf->data; + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) { + pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + /* + * we get the newest zone, and the next one must be the oldest + * or unused zone, because we do write one by one like a circle. + */ + if (hdr->time.tv_sec >= time.tv_sec) { + time.tv_sec = hdr->time.tv_sec; + cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt; + } + + if (hdr->reason == KMSG_DUMP_OOPS) + cxt->oops_counter = + max(cxt->oops_counter, hdr->counter); + else if (hdr->reason == KMSG_DUMP_PANIC) + cxt->panic_counter = + max(cxt->panic_counter, hdr->counter); + + if (!atomic_read(&buf->datalen)) { + pr_debug("found erased zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, + atomic_read(&buf->datalen)); + continue; + } + + if (!is_on_panic()) + zone->should_recover = true; + pr_debug("found nice zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, atomic_read(&buf->datalen)); + } + + return 0; +} + +static int psz_kmsg_recover(struct psz_context *cxt) +{ + int ret; + + if (!cxt->kpszs) + return 0; + + ret = psz_kmsg_recover_meta(cxt); + if (ret) + goto recover_fail; + + ret = psz_kmsg_recover_data(cxt); + if (ret) + goto recover_fail; + + return 0; +recover_fail: + pr_debug("psz_recover_kmsg failed\n"); + return ret; +} + +static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct psz_buffer *oldbuf, tmpbuf; + int ret = 0; + char *buf; + ssize_t rcnt, len, start, off; + + if (!zone || zone->oldbuf) + return 0; + + if (is_on_panic()) { + /* save data as much as possible */ + psz_flush_dirty_zone(zone); + return 0; + } + + if (unlikely(!info->read)) + return -EINVAL; + + len = sizeof(struct psz_buffer); + rcnt = info->read((char *)&tmpbuf, len, zone->off); + if (rcnt != len) { + pr_debug("read zone %s failed\n", zone->name); + return rcnt < 0 ? rcnt : -EIO; + } + + if (tmpbuf.sig != zone->buffer->sig) { + pr_debug("no valid data in zone %s\n", zone->name); + return 0; + } + + if (zone->buffer_size < atomic_read(&tmpbuf.datalen) || + zone->buffer_size < atomic_read(&tmpbuf.start)) { + pr_info("found overtop zone: %s: off %lld, size %zu\n", + zone->name, zone->off, zone->buffer_size); + /* just keep going */ + return 0; + } + + if (!atomic_read(&tmpbuf.datalen)) { + pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + return 0; + } + + pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + + len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf); + oldbuf = kzalloc(len, GFP_KERNEL); + if (!oldbuf) + return -ENOMEM; + + memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf)); + buf = (char *)oldbuf + sizeof(*oldbuf); + len = atomic_read(&oldbuf->datalen); + start = atomic_read(&oldbuf->start); + off = zone->off + sizeof(*oldbuf); + + /* get part of data */ + rcnt = info->read(buf, len - start, off + start); + if (rcnt != len - start) { + pr_err("read zone %s failed\n", zone->name); + ret = rcnt < 0 ? rcnt : -EIO; + goto free_oldbuf; + } + + /* get the rest of data */ + rcnt = info->read(buf + len - start, start, off); + if (rcnt != start) { + pr_err("read zone %s failed\n", zone->name); + ret = rcnt < 0 ? rcnt : -EIO; + goto free_oldbuf; + } + + zone->oldbuf = oldbuf; + psz_flush_dirty_zone(zone); + return 0; + +free_oldbuf: + kfree(oldbuf); + return ret; +} + +static int psz_recover_zones(struct psz_context *cxt, + struct pstore_zone **zones, unsigned int cnt) +{ + int ret; + unsigned int i; + struct pstore_zone *zone; + + if (!zones) + return 0; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (unlikely(!zone)) + continue; + ret = psz_recover_zone(cxt, zone); + if (ret) + goto recover_fail; + } + + return 0; +recover_fail: + pr_debug("recover %s[%u] failed\n", zone->name, i); + return ret; +} + +/** + * psz_recovery() - recover data from storage + * @cxt: the context of pstore/zone + * + * recovery means reading data back from storage after rebooting + * + * Return: 0 on success, others on failure. + */ +static inline int psz_recovery(struct psz_context *cxt) +{ + int ret; + + if (atomic_read(&cxt->recovered)) + return 0; + + ret = psz_kmsg_recover(cxt); + if (ret) + goto out; + + ret = psz_recover_zone(cxt, cxt->ppsz); + if (ret) + goto out; + + ret = psz_recover_zone(cxt, cxt->cpsz); + if (ret) + goto out; + + ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt); + +out: + if (unlikely(ret)) + pr_err("recover failed\n"); + else { + pr_debug("recover end!\n"); + atomic_set(&cxt->recovered, 1); + } + return ret; +} + +static int psz_pstore_open(struct pstore_info *psi) +{ + struct psz_context *cxt = psi->data; + + cxt->kmsg_read_cnt = 0; + cxt->pmsg_read_cnt = 0; + cxt->console_read_cnt = 0; + cxt->ftrace_read_cnt = 0; + return 0; +} + +static inline bool psz_old_ok(struct pstore_zone *zone) +{ + if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen)) + return true; + return false; +} + +static inline bool psz_ok(struct pstore_zone *zone) +{ + if (zone && zone->buffer && buffer_datalen(zone)) + return true; + return false; +} + +static inline int psz_kmsg_erase(struct psz_context *cxt, + struct pstore_zone *zone, struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + size_t size; + + if (unlikely(!psz_ok(zone))) + return 0; + + /* this zone is already updated, no need to erase */ + if (record->count != hdr->counter) + return 0; + + size = buffer_datalen(zone) + sizeof(*zone->buffer); + atomic_set(&zone->buffer->datalen, 0); + if (cxt->pstore_zone_info->erase) + return cxt->pstore_zone_info->erase(size, zone->off); + else + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); +} + +static inline int psz_record_erase(struct psz_context *cxt, + struct pstore_zone *zone) +{ + if (unlikely(!psz_old_ok(zone))) + return 0; + + kfree(zone->oldbuf); + zone->oldbuf = NULL; + /* + * if there are new data in zone buffer, that means the old data + * are already invalid. It is no need to flush 0 (erase) to + * block device. + */ + if (!buffer_datalen(zone)) + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + psz_flush_dirty_zone(zone); + return 0; +} + +static int psz_pstore_erase(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + switch (record->type) { + case PSTORE_TYPE_DMESG: + if (record->id >= cxt->kmsg_max_cnt) + return -EINVAL; + return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); + case PSTORE_TYPE_PMSG: + return psz_record_erase(cxt, cxt->ppsz); + case PSTORE_TYPE_CONSOLE: + return psz_record_erase(cxt, cxt->cpsz); + case PSTORE_TYPE_FTRACE: + if (record->id >= cxt->ftrace_max_cnt) + return -EINVAL; + return psz_record_erase(cxt, cxt->fpszs[record->id]); + default: return -EINVAL; + } +} + +static void psz_write_kmsg_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + hdr->magic = PSTORE_KMSG_HEADER_MAGIC; + hdr->compressed = record->compressed; + hdr->time.tv_sec = record->time.tv_sec; + hdr->time.tv_nsec = record->time.tv_nsec; + hdr->reason = record->reason; + if (hdr->reason == KMSG_DUMP_OOPS) + hdr->counter = ++cxt->oops_counter; + else if (hdr->reason == KMSG_DUMP_PANIC) + hdr->counter = ++cxt->panic_counter; + else + hdr->counter = 0; +} + +/* + * In case zone is broken, which may occur to MTD device, we try each zones, + * start at cxt->kmsg_write_cnt. + */ +static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, + struct pstore_record *record) +{ + size_t size, hlen; + struct pstore_zone *zone; + unsigned int i; + + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + unsigned int zonenum, len; + int ret; + + zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt; + zone = cxt->kpszs[zonenum]; + if (unlikely(!zone)) + return -ENOSPC; + + /* avoid destroying old data, allocate a new one */ + len = zone->buffer_size + sizeof(*zone->buffer); + zone->oldbuf = zone->buffer; + zone->buffer = kzalloc(len, GFP_ATOMIC); + if (!zone->buffer) { + zone->buffer = zone->oldbuf; + return -ENOMEM; + } + zone->buffer->sig = zone->oldbuf->sig; + + pr_debug("write %s to zone id %d\n", zone->name, zonenum); + psz_write_kmsg_hdr(zone, record); + hlen = sizeof(struct psz_kmsg_header); + size = min_t(size_t, record->size, zone->buffer_size - hlen); + ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); + if (likely(!ret || ret != -ENOMSG)) { + cxt->kmsg_write_cnt = zonenum + 1; + cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt; + /* no need to try next zone, free last zone buffer */ + kfree(zone->oldbuf); + zone->oldbuf = NULL; + return ret; + } + + pr_debug("zone %u may be broken, try next dmesg zone\n", + zonenum); + kfree(zone->buffer); + zone->buffer = zone->oldbuf; + zone->oldbuf = NULL; + } + + return -EBUSY; +} + +static int notrace psz_kmsg_write(struct psz_context *cxt, + struct pstore_record *record) +{ + int ret; + + /* + * Explicitly only take the first part of any new crash. + * If our buffer is larger than kmsg_bytes, this can never happen, + * and if our buffer is smaller than kmsg_bytes, we don't want the + * report split across multiple records. + */ + if (record->part != 1) + return -ENOSPC; + + if (!cxt->kpszs) + return -ENOSPC; + + ret = psz_kmsg_write_record(cxt, record); + if (!ret && is_on_panic()) { + /* ensure all data are flushed to storage when panic */ + pr_debug("try to flush other dirty zones\n"); + psz_flush_all_dirty_zones(NULL); + } + + /* always return 0 as we had handled it on buffer */ + return 0; +} + +static int notrace psz_record_write(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t start, rem; + bool is_full_data = false; + char *buf; + int cnt; + + if (!zone || !record) + return -ENOSPC; + + if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size) + is_full_data = true; + + cnt = record->size; + buf = record->buf; + if (unlikely(cnt > zone->buffer_size)) { + buf += cnt - zone->buffer_size; + cnt = zone->buffer_size; + } + + start = buffer_start(zone); + rem = zone->buffer_size - start; + if (unlikely(rem < cnt)) { + psz_zone_write(zone, FLUSH_PART, buf, rem, start); + buf += rem; + cnt -= rem; + start = 0; + is_full_data = true; + } + + atomic_set(&zone->buffer->start, cnt + start); + psz_zone_write(zone, FLUSH_PART, buf, cnt, start); + + /** + * psz_zone_write will set datalen as start + cnt. + * It work if actual data length lesser than buffer size. + * If data length greater than buffer size, pmsg will rewrite to + * beginning of zone, which make buffer->datalen wrongly. + * So we should reset datalen as buffer size once actual data length + * greater than buffer size. + */ + if (is_full_data) { + atomic_set(&zone->buffer->datalen, zone->buffer_size); + psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + } + return 0; +} + +static int notrace psz_pstore_write(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + if (record->type == PSTORE_TYPE_DMESG && + record->reason == KMSG_DUMP_PANIC) + atomic_set(&cxt->on_panic, 1); + + /* + * if on panic, do not write except panic records + * Fix case that panic_write prints log which wakes up console backend. + */ + if (is_on_panic() && record->type != PSTORE_TYPE_DMESG) + return -EBUSY; + + switch (record->type) { + case PSTORE_TYPE_DMESG: + return psz_kmsg_write(cxt, record); + case PSTORE_TYPE_CONSOLE: + return psz_record_write(cxt->cpsz, record); + case PSTORE_TYPE_PMSG: + return psz_record_write(cxt->ppsz, record); + case PSTORE_TYPE_FTRACE: { + int zonenum = smp_processor_id(); + + if (!cxt->fpszs) + return -ENOSPC; + return psz_record_write(cxt->fpszs[zonenum], record); + } + default: + return -EINVAL; + } +} + +static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) +{ + struct pstore_zone *zone = NULL; + + while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) { + zone = cxt->kpszs[cxt->kmsg_read_cnt++]; + if (psz_ok(zone)) + return zone; + } + + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* + * No need psz_old_ok(). Let psz_ftrace_read() do so for + * combination. psz_ftrace_read() should traverse over + * all zones in case of some zone without data. + */ + return cxt->fpszs[cxt->ftrace_read_cnt++]; + + if (cxt->pmsg_read_cnt == 0) { + cxt->pmsg_read_cnt++; + zone = cxt->ppsz; + if (psz_old_ok(zone)) + return zone; + } + + if (cxt->console_read_cnt == 0) { + cxt->console_read_cnt++; + zone = cxt->cpsz; + if (psz_old_ok(zone)) + return zone; + } + + return NULL; +} + +static int psz_kmsg_read_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) + return -EINVAL; + record->compressed = hdr->compressed; + record->time.tv_sec = hdr->time.tv_sec; + record->time.tv_nsec = hdr->time.tv_nsec; + record->reason = hdr->reason; + record->count = hdr->counter; + return 0; +} + +static ssize_t psz_kmsg_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + ssize_t size, hlen = 0; + + size = buffer_datalen(zone); + /* Clear and skip this kmsg dump record if it has no valid header */ + if (psz_kmsg_read_hdr(zone, record)) { + atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->dirty, 0); + return -ENOMSG; + } + size -= sizeof(struct psz_kmsg_header); + + if (!record->compressed) { + char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", + kmsg_dump_reason_str(record->reason), + record->count); + hlen = strlen(buf); + record->buf = krealloc(buf, hlen + size, GFP_KERNEL); + if (!record->buf) { + kfree(buf); + return -ENOMEM; + } + } else { + record->buf = kmalloc(size, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + } + + size = psz_zone_read_buffer(zone, record->buf + hlen, size, + sizeof(struct psz_kmsg_header)); + if (unlikely(size < 0)) { + kfree(record->buf); + return -ENOMSG; + } + + return size + hlen; +} + +/* try to combine all ftrace zones */ +static ssize_t psz_ftrace_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt; + struct psz_buffer *buf; + int ret; + + if (!zone || !record) + return -ENOSPC; + + if (!psz_old_ok(zone)) + goto out; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + ret = pstore_ftrace_combine_log(&record->buf, &record->size, + (char *)buf->data, atomic_read(&buf->datalen)); + if (unlikely(ret)) + return ret; + +out: + cxt = record->psi->data; + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* then, read next ftrace zone */ + return -ENOMSG; + record->id = 0; + return record->size ? record->size : -ENOMSG; +} + +static ssize_t psz_record_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t len; + struct psz_buffer *buf; + + if (!zone || !record) + return -ENOSPC; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + len = atomic_read(&buf->datalen); + record->buf = kmalloc(len, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + + if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) { + kfree(record->buf); + return -ENOMSG; + } + + return len; +} + +static ssize_t psz_pstore_read(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + ssize_t (*readop)(struct pstore_zone *zone, + struct pstore_record *record); + struct pstore_zone *zone; + ssize_t ret; + + /* before read, we must recover from storage */ + ret = psz_recovery(cxt); + if (ret) + return ret; + +next_zone: + zone = psz_read_next_zone(cxt); + if (!zone) + return 0; + + record->type = zone->type; + switch (record->type) { + case PSTORE_TYPE_DMESG: + readop = psz_kmsg_read; + record->id = cxt->kmsg_read_cnt - 1; + break; + case PSTORE_TYPE_FTRACE: + readop = psz_ftrace_read; + break; + case PSTORE_TYPE_CONSOLE: + case PSTORE_TYPE_PMSG: + readop = psz_record_read; + break; + default: + goto next_zone; + } + + ret = readop(zone, record); + if (ret == -ENOMSG) + goto next_zone; + return ret; +} + +static struct psz_context pstore_zone_cxt = { + .pstore_zone_info_lock = + __MUTEX_INITIALIZER(pstore_zone_cxt.pstore_zone_info_lock), + .recovered = ATOMIC_INIT(0), + .on_panic = ATOMIC_INIT(0), + .pstore = { + .owner = THIS_MODULE, + .open = psz_pstore_open, + .read = psz_pstore_read, + .write = psz_pstore_write, + .erase = psz_pstore_erase, + }, +}; + +static void psz_free_zone(struct pstore_zone **pszone) +{ + struct pstore_zone *zone = *pszone; + + if (!zone) + return; + + kfree(zone->buffer); + kfree(zone); + *pszone = NULL; +} + +static void psz_free_zones(struct pstore_zone ***pszones, unsigned int *cnt) +{ + struct pstore_zone **zones = *pszones; + + if (!zones) + return; + + while (*cnt > 0) { + (*cnt)--; + psz_free_zone(&(zones[*cnt])); + } + kfree(zones); + *pszones = NULL; +} + +static void psz_free_all_zones(struct psz_context *cxt) +{ + if (cxt->kpszs) + psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); + if (cxt->ppsz) + psz_free_zone(&cxt->ppsz); + if (cxt->cpsz) + psz_free_zone(&cxt->cpsz); + if (cxt->fpszs) + psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt); +} + +static struct pstore_zone *psz_init_zone(enum pstore_type_id type, + loff_t *off, size_t size) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone *zone; + const char *name = pstore_type_to_name(type); + + if (!size) + return NULL; + + if (*off + size > info->total_size) { + pr_err("no room for %s (0x%zx@0x%llx over 0x%lx)\n", + name, size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + zone = kzalloc(sizeof(struct pstore_zone), GFP_KERNEL); + if (!zone) + return ERR_PTR(-ENOMEM); + + zone->buffer = kmalloc(size, GFP_KERNEL); + if (!zone->buffer) { + kfree(zone); + return ERR_PTR(-ENOMEM); + } + memset(zone->buffer, 0xFF, size); + zone->off = *off; + zone->name = name; + zone->type = type; + zone->buffer_size = size - sizeof(struct psz_buffer); + zone->buffer->sig = type ^ PSZ_SIG; + zone->oldbuf = NULL; + atomic_set(&zone->dirty, 0); + atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->buffer->start, 0); + + *off += size; + + pr_debug("pszone %s: off 0x%llx, %zu header, %zu data\n", zone->name, + zone->off, sizeof(*zone->buffer), zone->buffer_size); + return zone; +} + +static struct pstore_zone **psz_init_zones(enum pstore_type_id type, + loff_t *off, size_t total_size, ssize_t record_size, + unsigned int *cnt) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone **zones, *zone; + const char *name = pstore_type_to_name(type); + int c, i; + + *cnt = 0; + if (!total_size || !record_size) + return NULL; + + if (*off + total_size > info->total_size) { + pr_err("no room for zones %s (0x%zx@0x%llx over 0x%lx)\n", + name, total_size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + c = total_size / record_size; + zones = kcalloc(c, sizeof(*zones), GFP_KERNEL); + if (!zones) { + pr_err("allocate for zones %s failed\n", name); + return ERR_PTR(-ENOMEM); + } + memset(zones, 0, c * sizeof(*zones)); + + for (i = 0; i < c; i++) { + zone = psz_init_zone(type, off, record_size); + if (!zone || IS_ERR(zone)) { + pr_err("initialize zones %s failed\n", name); + psz_free_zones(&zones, &i); + return (void *)zone; + } + zones[i] = zone; + } + + *cnt = c; + return zones; +} + +static int psz_alloc_zones(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + loff_t off = 0; + int err; + size_t off_size = 0; + + off_size += info->pmsg_size; + cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size); + if (IS_ERR(cxt->ppsz)) { + err = PTR_ERR(cxt->ppsz); + cxt->ppsz = NULL; + goto free_out; + } + + off_size += info->console_size; + cxt->cpsz = psz_init_zone(PSTORE_TYPE_CONSOLE, &off, + info->console_size); + if (IS_ERR(cxt->cpsz)) { + err = PTR_ERR(cxt->cpsz); + cxt->cpsz = NULL; + goto free_out; + } + + off_size += info->ftrace_size; + cxt->fpszs = psz_init_zones(PSTORE_TYPE_FTRACE, &off, + info->ftrace_size, + info->ftrace_size / nr_cpu_ids, + &cxt->ftrace_max_cnt); + if (IS_ERR(cxt->fpszs)) { + err = PTR_ERR(cxt->fpszs); + cxt->fpszs = NULL; + goto free_out; + } + + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, + info->total_size - off_size, + info->kmsg_size, &cxt->kmsg_max_cnt); + if (IS_ERR(cxt->kpszs)) { + err = PTR_ERR(cxt->kpszs); + cxt->kpszs = NULL; + goto free_out; + } + + return 0; +free_out: + psz_free_all_zones(cxt); + return err; +} + +/** + * register_pstore_zone() - register to pstore/zone + * + * @info: back-end driver information. See &struct pstore_zone_info. + * + * Only one back-end at one time. + * + * Return: 0 on success, others on failure. + */ +int register_pstore_zone(struct pstore_zone_info *info) +{ + int err = -EINVAL; + struct psz_context *cxt = &pstore_zone_cxt; + + if (info->total_size < 4096) { + pr_warn("total_size must be >= 4096\n"); + return -EINVAL; + } + if (info->total_size > SZ_128M) { + pr_warn("capping size to 128MiB\n"); + info->total_size = SZ_128M; + } + + if (!info->kmsg_size && !info->pmsg_size && !info->console_size && + !info->ftrace_size) { + pr_warn("at least one record size must be non-zero\n"); + return -EINVAL; + } + + if (!info->name || !info->name[0]) + return -EINVAL; + +#define check_size(name, size) { \ + if (info->name > 0 && info->name < (size)) { \ + pr_err(#name " must be over %d\n", (size)); \ + return -EINVAL; \ + } \ + if (info->name & (size - 1)) { \ + pr_err(#name " must be a multiple of %d\n", \ + (size)); \ + return -EINVAL; \ + } \ + } + + check_size(total_size, 4096); + check_size(kmsg_size, SECTOR_SIZE); + check_size(pmsg_size, SECTOR_SIZE); + check_size(console_size, SECTOR_SIZE); + check_size(ftrace_size, SECTOR_SIZE); + +#undef check_size + + /* + * the @read and @write must be applied. + * if no @read, pstore may mount failed. + * if no @write, pstore do not support to remove record file. + */ + if (!info->read || !info->write) { + pr_err("no valid general read/write interface\n"); + return -EINVAL; + } + + mutex_lock(&cxt->pstore_zone_info_lock); + if (cxt->pstore_zone_info) { + pr_warn("'%s' already loaded: ignoring '%s'\n", + cxt->pstore_zone_info->name, info->name); + mutex_unlock(&cxt->pstore_zone_info_lock); + return -EBUSY; + } + cxt->pstore_zone_info = info; + + pr_debug("register %s with properties:\n", info->name); + pr_debug("\ttotal size : %ld Bytes\n", info->total_size); + pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); + pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); + pr_debug("\tconsole size : %ld Bytes\n", info->console_size); + pr_debug("\tftrace size : %ld Bytes\n", info->ftrace_size); + + err = psz_alloc_zones(cxt); + if (err) { + pr_err("alloc zones failed\n"); + goto fail_out; + } + + if (info->kmsg_size) { + cxt->pstore.bufsize = cxt->kpszs[0]->buffer_size - + sizeof(struct psz_kmsg_header); + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); + if (!cxt->pstore.buf) { + err = -ENOMEM; + goto fail_free; + } + } + cxt->pstore.data = cxt; + + pr_info("registered %s as backend for", info->name); + cxt->pstore.max_reason = info->max_reason; + cxt->pstore.name = info->name; + if (info->kmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + pr_cont(" kmsg(%s", + kmsg_dump_reason_str(cxt->pstore.max_reason)); + if (cxt->pstore_zone_info->panic_write) + pr_cont(",panic_write"); + pr_cont(")"); + } + if (info->pmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_PMSG; + pr_cont(" pmsg"); + } + if (info->console_size) { + cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; + pr_cont(" console"); + } + if (info->ftrace_size) { + cxt->pstore.flags |= PSTORE_FLAGS_FTRACE; + pr_cont(" ftrace"); + } + pr_cont("\n"); + + err = pstore_register(&cxt->pstore); + if (err) { + pr_err("registering with pstore failed\n"); + goto fail_free; + } + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + + return 0; + +fail_free: + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + psz_free_all_zones(cxt); +fail_out: + pstore_zone_cxt.pstore_zone_info = NULL; + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + return err; +} +EXPORT_SYMBOL_GPL(register_pstore_zone); + +/** + * unregister_pstore_zone() - unregister to pstore/zone + * + * @info: back-end driver information. See struct pstore_zone_info. + */ +void unregister_pstore_zone(struct pstore_zone_info *info) +{ + struct psz_context *cxt = &pstore_zone_cxt; + + mutex_lock(&cxt->pstore_zone_info_lock); + if (!cxt->pstore_zone_info) { + mutex_unlock(&cxt->pstore_zone_info_lock); + return; + } + + /* Stop incoming writes from pstore. */ + pstore_unregister(&cxt->pstore); + + /* Flush any pending writes. */ + psz_flush_all_dirty_zones(NULL); + flush_delayed_work(&psz_cleaner); + + /* Clean up allocations. */ + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + cxt->pstore_zone_info = NULL; + + psz_free_all_zones(cxt); + + /* Clear counters and zone state. */ + cxt->oops_counter = 0; + cxt->panic_counter = 0; + atomic_set(&cxt->recovered, 0); + atomic_set(&cxt->on_panic, 0); + + mutex_unlock(&cxt->pstore_zone_info_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_zone); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("Storage Manager for pstore/blk"); -- cgit v1.2.3