From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 20:49:45 +0200
Subject: Adding upstream version 6.1.76.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 mm/kfence/.kunitconfig  |    6 +
 mm/kfence/Makefile      |    6 +
 mm/kfence/core.c        | 1164 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/kfence/kfence.h      |  134 ++++++
 mm/kfence/kfence_test.c |  870 +++++++++++++++++++++++++++++++++++
 mm/kfence/report.c      |  327 +++++++++++++
 6 files changed, 2507 insertions(+)
 create mode 100644 mm/kfence/.kunitconfig
 create mode 100644 mm/kfence/Makefile
 create mode 100644 mm/kfence/core.c
 create mode 100644 mm/kfence/kfence.h
 create mode 100644 mm/kfence/kfence_test.c
 create mode 100644 mm/kfence/report.c

(limited to 'mm/kfence')

diff --git a/mm/kfence/.kunitconfig b/mm/kfence/.kunitconfig
new file mode 100644
index 000000000..f3d65e939
--- /dev/null
+++ b/mm/kfence/.kunitconfig
@@ -0,0 +1,6 @@
+CONFIG_KUNIT=y
+CONFIG_KFENCE=y
+CONFIG_KFENCE_KUNIT_TEST=y
+
+# Additional dependencies.
+CONFIG_FTRACE=y
diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
new file mode 100644
index 000000000..2de2a58d1
--- /dev/null
+++ b/mm/kfence/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := core.o report.o
+
+CFLAGS_kfence_test.o := -fno-omit-frame-pointer -fno-optimize-sibling-calls
+obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
new file mode 100644
index 000000000..c597cfebb
--- /dev/null
+++ b/mm/kfence/core.c
@@ -0,0 +1,1164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE guarded object allocator and fault handling.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#define pr_fmt(fmt) "kfence: " fmt
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/hash.h>
+#include <linux/irq_work.h>
+#include <linux/jhash.h>
+#include <linux/kcsan-checks.h>
+#include <linux/kfence.h>
+#include <linux/kmemleak.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/log2.h>
+#include <linux/memblock.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/panic_notifier.h>
+#include <linux/random.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/sysctl.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* Disables KFENCE on the first warning assuming an irrecoverable error. */
+#define KFENCE_WARN_ON(cond)                                                   \
+	({                                                                     \
+		const bool __cond = WARN_ON(cond);                             \
+		if (unlikely(__cond)) {                                        \
+			WRITE_ONCE(kfence_enabled, false);                     \
+			disabled_by_warn = true;                               \
+		}                                                              \
+		__cond;                                                        \
+	})
+
+/* === Data ================================================================= */
+
+static bool kfence_enabled __read_mostly;
+static bool disabled_by_warn __read_mostly;
+
+unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "kfence."
+
+static int kfence_enable_late(void);
+static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
+{
+	unsigned long num;
+	int ret = kstrtoul(val, 0, &num);
+
+	if (ret < 0)
+		return ret;
+
+	/* Using 0 to indicate KFENCE is disabled. */
+	if (!num && READ_ONCE(kfence_enabled)) {
+		pr_info("disabled\n");
+		WRITE_ONCE(kfence_enabled, false);
+	}
+
+	*((unsigned long *)kp->arg) = num;
+
+	if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
+		return disabled_by_warn ? -EINVAL : kfence_enable_late();
+	return 0;
+}
+
+static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
+{
+	if (!READ_ONCE(kfence_enabled))
+		return sprintf(buffer, "0\n");
+
+	return param_get_ulong(buffer, kp);
+}
+
+static const struct kernel_param_ops sample_interval_param_ops = {
+	.set = param_set_sample_interval,
+	.get = param_get_sample_interval,
+};
+module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
+
+/* Pool usage% threshold when currently covered allocations are skipped. */
+static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
+module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
+
+/* If true, use a deferrable timer. */
+static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
+module_param_named(deferrable, kfence_deferrable, bool, 0444);
+
+/* If true, check all canary bytes on panic. */
+static bool kfence_check_on_panic __read_mostly;
+module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444);
+
+/* The pool of pages used for guard pages and objects. */
+char *__kfence_pool __read_mostly;
+EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
+
+/*
+ * Per-object metadata, with one-to-one mapping of object metadata to
+ * backing pages (in __kfence_pool).
+ */
+static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
+struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+/* Freelist with available objects. */
+static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
+static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+
+/*
+ * The static key to set up a KFENCE allocation; or if static keys are not used
+ * to gate allocations, to avoid a load and compare if KFENCE is disabled.
+ */
+DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
+
+/* Gates the allocation, ensuring only one succeeds in a given period. */
+atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
+
+/*
+ * A Counting Bloom filter of allocation coverage: limits currently covered
+ * allocations of the same source filling up the pool.
+ *
+ * Assuming a range of 15%-85% unique allocations in the pool at any point in
+ * time, the below parameters provide a probablity of 0.02-0.33 for false
+ * positive hits respectively:
+ *
+ *	P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM
+ */
+#define ALLOC_COVERED_HNUM	2
+#define ALLOC_COVERED_ORDER	(const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
+#define ALLOC_COVERED_SIZE	(1 << ALLOC_COVERED_ORDER)
+#define ALLOC_COVERED_HNEXT(h)	hash_32(h, ALLOC_COVERED_ORDER)
+#define ALLOC_COVERED_MASK	(ALLOC_COVERED_SIZE - 1)
+static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
+
+/* Stack depth used to determine uniqueness of an allocation. */
+#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
+
+/*
+ * Randomness for stack hashes, making the same collisions across reboots and
+ * different machines less likely.
+ */
+static u32 stack_hash_seed __ro_after_init;
+
+/* Statistics counters for debugfs. */
+enum kfence_counter_id {
+	KFENCE_COUNTER_ALLOCATED,
+	KFENCE_COUNTER_ALLOCS,
+	KFENCE_COUNTER_FREES,
+	KFENCE_COUNTER_ZOMBIES,
+	KFENCE_COUNTER_BUGS,
+	KFENCE_COUNTER_SKIP_INCOMPAT,
+	KFENCE_COUNTER_SKIP_CAPACITY,
+	KFENCE_COUNTER_SKIP_COVERED,
+	KFENCE_COUNTER_COUNT,
+};
+static atomic_long_t counters[KFENCE_COUNTER_COUNT];
+static const char *const counter_names[] = {
+	[KFENCE_COUNTER_ALLOCATED]	= "currently allocated",
+	[KFENCE_COUNTER_ALLOCS]		= "total allocations",
+	[KFENCE_COUNTER_FREES]		= "total frees",
+	[KFENCE_COUNTER_ZOMBIES]	= "zombie allocations",
+	[KFENCE_COUNTER_BUGS]		= "total bugs",
+	[KFENCE_COUNTER_SKIP_INCOMPAT]	= "skipped allocations (incompatible)",
+	[KFENCE_COUNTER_SKIP_CAPACITY]	= "skipped allocations (capacity)",
+	[KFENCE_COUNTER_SKIP_COVERED]	= "skipped allocations (covered)",
+};
+static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
+
+/* === Internals ============================================================ */
+
+static inline bool should_skip_covered(void)
+{
+	unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
+
+	return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
+}
+
+static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
+{
+	num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
+	num_entries = filter_irq_stacks(stack_entries, num_entries);
+	return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
+}
+
+/*
+ * Adds (or subtracts) count @val for allocation stack trace hash
+ * @alloc_stack_hash from Counting Bloom filter.
+ */
+static void alloc_covered_add(u32 alloc_stack_hash, int val)
+{
+	int i;
+
+	for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
+		atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
+		alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
+	}
+}
+
+/*
+ * Returns true if the allocation stack trace hash @alloc_stack_hash is
+ * currently contained (non-zero count) in Counting Bloom filter.
+ */
+static bool alloc_covered_contains(u32 alloc_stack_hash)
+{
+	int i;
+
+	for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
+		if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
+			return false;
+		alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
+	}
+
+	return true;
+}
+
+static bool kfence_protect(unsigned long addr)
+{
+	return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
+}
+
+static bool kfence_unprotect(unsigned long addr)
+{
+	return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
+}
+
+static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
+{
+	unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
+	unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
+
+	/* The checks do not affect performance; only called from slow-paths. */
+
+	/* Only call with a pointer into kfence_metadata. */
+	if (KFENCE_WARN_ON(meta < kfence_metadata ||
+			   meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
+		return 0;
+
+	/*
+	 * This metadata object only ever maps to 1 page; verify that the stored
+	 * address is in the expected range.
+	 */
+	if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
+		return 0;
+
+	return pageaddr;
+}
+
+/*
+ * Update the object's metadata state, including updating the alloc/free stacks
+ * depending on the state transition.
+ */
+static noinline void
+metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
+		      unsigned long *stack_entries, size_t num_stack_entries)
+{
+	struct kfence_track *track =
+		next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
+
+	lockdep_assert_held(&meta->lock);
+
+	if (stack_entries) {
+		memcpy(track->stack_entries, stack_entries,
+		       num_stack_entries * sizeof(stack_entries[0]));
+	} else {
+		/*
+		 * Skip over 1 (this) functions; noinline ensures we do not
+		 * accidentally skip over the caller by never inlining.
+		 */
+		num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
+	}
+	track->num_stack_entries = num_stack_entries;
+	track->pid = task_pid_nr(current);
+	track->cpu = raw_smp_processor_id();
+	track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
+
+	/*
+	 * Pairs with READ_ONCE() in
+	 *	kfence_shutdown_cache(),
+	 *	kfence_handle_page_fault().
+	 */
+	WRITE_ONCE(meta->state, next);
+}
+
+/* Write canary byte to @addr. */
+static inline bool set_canary_byte(u8 *addr)
+{
+	*addr = KFENCE_CANARY_PATTERN(addr);
+	return true;
+}
+
+/* Check canary byte at @addr. */
+static inline bool check_canary_byte(u8 *addr)
+{
+	struct kfence_metadata *meta;
+	unsigned long flags;
+
+	if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
+		return true;
+
+	atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+
+	meta = addr_to_metadata((unsigned long)addr);
+	raw_spin_lock_irqsave(&meta->lock, flags);
+	kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION);
+	raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+	return false;
+}
+
+/* __always_inline this to ensure we won't do an indirect call to fn. */
+static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *))
+{
+	const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
+	unsigned long addr;
+
+	/*
+	 * We'll iterate over each canary byte per-side until fn() returns
+	 * false. However, we'll still iterate over the canary bytes to the
+	 * right of the object even if there was an error in the canary bytes to
+	 * the left of the object. Specifically, if check_canary_byte()
+	 * generates an error, showing both sides might give more clues as to
+	 * what the error is about when displaying which bytes were corrupted.
+	 */
+
+	/* Apply to left of object. */
+	for (addr = pageaddr; addr < meta->addr; addr++) {
+		if (!fn((u8 *)addr))
+			break;
+	}
+
+	/* Apply to right of object. */
+	for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
+		if (!fn((u8 *)addr))
+			break;
+	}
+}
+
+static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
+				  unsigned long *stack_entries, size_t num_stack_entries,
+				  u32 alloc_stack_hash)
+{
+	struct kfence_metadata *meta = NULL;
+	unsigned long flags;
+	struct slab *slab;
+	void *addr;
+	const bool random_right_allocate = prandom_u32_max(2);
+	const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
+				  !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS);
+
+	/* Try to obtain a free object. */
+	raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+	if (!list_empty(&kfence_freelist)) {
+		meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
+		list_del_init(&meta->list);
+	}
+	raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+	if (!meta) {
+		atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
+		return NULL;
+	}
+
+	if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
+		/*
+		 * This is extremely unlikely -- we are reporting on a
+		 * use-after-free, which locked meta->lock, and the reporting
+		 * code via printk calls kmalloc() which ends up in
+		 * kfence_alloc() and tries to grab the same object that we're
+		 * reporting on. While it has never been observed, lockdep does
+		 * report that there is a possibility of deadlock. Fix it by
+		 * using trylock and bailing out gracefully.
+		 */
+		raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+		/* Put the object back on the freelist. */
+		list_add_tail(&meta->list, &kfence_freelist);
+		raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+		return NULL;
+	}
+
+	meta->addr = metadata_to_pageaddr(meta);
+	/* Unprotect if we're reusing this page. */
+	if (meta->state == KFENCE_OBJECT_FREED)
+		kfence_unprotect(meta->addr);
+
+	/*
+	 * Note: for allocations made before RNG initialization, will always
+	 * return zero. We still benefit from enabling KFENCE as early as
+	 * possible, even when the RNG is not yet available, as this will allow
+	 * KFENCE to detect bugs due to earlier allocations. The only downside
+	 * is that the out-of-bounds accesses detected are deterministic for
+	 * such allocations.
+	 */
+	if (random_right_allocate) {
+		/* Allocate on the "right" side, re-calculate address. */
+		meta->addr += PAGE_SIZE - size;
+		meta->addr = ALIGN_DOWN(meta->addr, cache->align);
+	}
+
+	addr = (void *)meta->addr;
+
+	/* Update remaining metadata. */
+	metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
+	/* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
+	WRITE_ONCE(meta->cache, cache);
+	meta->size = size;
+	meta->alloc_stack_hash = alloc_stack_hash;
+	raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+	alloc_covered_add(alloc_stack_hash, 1);
+
+	/* Set required slab fields. */
+	slab = virt_to_slab((void *)meta->addr);
+	slab->slab_cache = cache;
+#if defined(CONFIG_SLUB)
+	slab->objects = 1;
+#elif defined(CONFIG_SLAB)
+	slab->s_mem = addr;
+#endif
+
+	/* Memory initialization. */
+	for_each_canary(meta, set_canary_byte);
+
+	/*
+	 * We check slab_want_init_on_alloc() ourselves, rather than letting
+	 * SL*B do the initialization, as otherwise we might overwrite KFENCE's
+	 * redzone.
+	 */
+	if (unlikely(slab_want_init_on_alloc(gfp, cache)))
+		memzero_explicit(addr, size);
+	if (cache->ctor)
+		cache->ctor(addr);
+
+	if (random_fault)
+		kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
+
+	atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
+	atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
+
+	return addr;
+}
+
+static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
+{
+	struct kcsan_scoped_access assert_page_exclusive;
+	unsigned long flags;
+	bool init;
+
+	raw_spin_lock_irqsave(&meta->lock, flags);
+
+	if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
+		/* Invalid or double-free, bail out. */
+		atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+		kfence_report_error((unsigned long)addr, false, NULL, meta,
+				    KFENCE_ERROR_INVALID_FREE);
+		raw_spin_unlock_irqrestore(&meta->lock, flags);
+		return;
+	}
+
+	/* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */
+	kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
+				  KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
+				  &assert_page_exclusive);
+
+	if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
+		kfence_unprotect((unsigned long)addr); /* To check canary bytes. */
+
+	/* Restore page protection if there was an OOB access. */
+	if (meta->unprotected_page) {
+		memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
+		kfence_protect(meta->unprotected_page);
+		meta->unprotected_page = 0;
+	}
+
+	/* Mark the object as freed. */
+	metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
+	init = slab_want_init_on_free(meta->cache);
+	raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+	alloc_covered_add(meta->alloc_stack_hash, -1);
+
+	/* Check canary bytes for memory corruption. */
+	for_each_canary(meta, check_canary_byte);
+
+	/*
+	 * Clear memory if init-on-free is set. While we protect the page, the
+	 * data is still there, and after a use-after-free is detected, we
+	 * unprotect the page, so the data is still accessible.
+	 */
+	if (!zombie && unlikely(init))
+		memzero_explicit(addr, meta->size);
+
+	/* Protect to detect use-after-frees. */
+	kfence_protect((unsigned long)addr);
+
+	kcsan_end_scoped_access(&assert_page_exclusive);
+	if (!zombie) {
+		/* Add it to the tail of the freelist for reuse. */
+		raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+		KFENCE_WARN_ON(!list_empty(&meta->list));
+		list_add_tail(&meta->list, &kfence_freelist);
+		raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+		atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
+		atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
+	} else {
+		/* See kfence_shutdown_cache(). */
+		atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
+	}
+}
+
+static void rcu_guarded_free(struct rcu_head *h)
+{
+	struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
+
+	kfence_guarded_free((void *)meta->addr, meta, false);
+}
+
+/*
+ * Initialization of the KFENCE pool after its allocation.
+ * Returns 0 on success; otherwise returns the address up to
+ * which partial initialization succeeded.
+ */
+static unsigned long kfence_init_pool(void)
+{
+	unsigned long addr = (unsigned long)__kfence_pool;
+	struct page *pages;
+	int i;
+
+	if (!arch_kfence_init_pool())
+		return addr;
+
+	pages = virt_to_page(__kfence_pool);
+
+	/*
+	 * Set up object pages: they must have PG_slab set, to avoid freeing
+	 * these as real pages.
+	 *
+	 * We also want to avoid inserting kfence_free() in the kfree()
+	 * fast-path in SLUB, and therefore need to ensure kfree() correctly
+	 * enters __slab_free() slow-path.
+	 */
+	for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
+		struct slab *slab = page_slab(nth_page(pages, i));
+
+		if (!i || (i % 2))
+			continue;
+
+		__folio_set_slab(slab_folio(slab));
+#ifdef CONFIG_MEMCG
+		slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
+				   MEMCG_DATA_OBJCGS;
+#endif
+	}
+
+	/*
+	 * Protect the first 2 pages. The first page is mostly unnecessary, and
+	 * merely serves as an extended guard page. However, adding one
+	 * additional page in the beginning gives us an even number of pages,
+	 * which simplifies the mapping of address to metadata index.
+	 */
+	for (i = 0; i < 2; i++) {
+		if (unlikely(!kfence_protect(addr)))
+			return addr;
+
+		addr += PAGE_SIZE;
+	}
+
+	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+		struct kfence_metadata *meta = &kfence_metadata[i];
+
+		/* Initialize metadata. */
+		INIT_LIST_HEAD(&meta->list);
+		raw_spin_lock_init(&meta->lock);
+		meta->state = KFENCE_OBJECT_UNUSED;
+		meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
+		list_add_tail(&meta->list, &kfence_freelist);
+
+		/* Protect the right redzone. */
+		if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
+			goto reset_slab;
+
+		addr += 2 * PAGE_SIZE;
+	}
+
+	return 0;
+
+reset_slab:
+	for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
+		struct slab *slab = page_slab(nth_page(pages, i));
+
+		if (!i || (i % 2))
+			continue;
+#ifdef CONFIG_MEMCG
+		slab->memcg_data = 0;
+#endif
+		__folio_clear_slab(slab_folio(slab));
+	}
+
+	return addr;
+}
+
+static bool __init kfence_init_pool_early(void)
+{
+	unsigned long addr;
+
+	if (!__kfence_pool)
+		return false;
+
+	addr = kfence_init_pool();
+
+	if (!addr) {
+		/*
+		 * The pool is live and will never be deallocated from this point on.
+		 * Ignore the pool object from the kmemleak phys object tree, as it would
+		 * otherwise overlap with allocations returned by kfence_alloc(), which
+		 * are registered with kmemleak through the slab post-alloc hook.
+		 */
+		kmemleak_ignore_phys(__pa(__kfence_pool));
+		return true;
+	}
+
+	/*
+	 * Only release unprotected pages, and do not try to go back and change
+	 * page attributes due to risk of failing to do so as well. If changing
+	 * page attributes for some pages fails, it is very likely that it also
+	 * fails for the first page, and therefore expect addr==__kfence_pool in
+	 * most failure cases.
+	 */
+	memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
+	__kfence_pool = NULL;
+	return false;
+}
+
+static bool kfence_init_pool_late(void)
+{
+	unsigned long addr, free_size;
+
+	addr = kfence_init_pool();
+
+	if (!addr)
+		return true;
+
+	/* Same as above. */
+	free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
+#ifdef CONFIG_CONTIG_ALLOC
+	free_contig_range(page_to_pfn(virt_to_page((void *)addr)), free_size / PAGE_SIZE);
+#else
+	free_pages_exact((void *)addr, free_size);
+#endif
+	__kfence_pool = NULL;
+	return false;
+}
+
+/* === DebugFS Interface ==================================================== */
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
+	for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
+		seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+/*
+ * debugfs seq_file operations for /sys/kernel/debug/kfence/objects.
+ * start_object() and next_object() return the object index + 1, because NULL is used
+ * to stop iteration.
+ */
+static void *start_object(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+		return (void *)((long)*pos + 1);
+	return NULL;
+}
+
+static void stop_object(struct seq_file *seq, void *v)
+{
+}
+
+static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+		return (void *)((long)*pos + 1);
+	return NULL;
+}
+
+static int show_object(struct seq_file *seq, void *v)
+{
+	struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&meta->lock, flags);
+	kfence_print_object(seq, meta);
+	raw_spin_unlock_irqrestore(&meta->lock, flags);
+	seq_puts(seq, "---------------------------------\n");
+
+	return 0;
+}
+
+static const struct seq_operations objects_sops = {
+	.start = start_object,
+	.next = next_object,
+	.stop = stop_object,
+	.show = show_object,
+};
+DEFINE_SEQ_ATTRIBUTE(objects);
+
+static int kfence_debugfs_init(void)
+{
+	struct dentry *kfence_dir;
+
+	if (!READ_ONCE(kfence_enabled))
+		return 0;
+
+	kfence_dir = debugfs_create_dir("kfence", NULL);
+	debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
+	debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
+	return 0;
+}
+
+late_initcall(kfence_debugfs_init);
+
+/* === Panic Notifier ====================================================== */
+
+static void kfence_check_all_canary(void)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+		struct kfence_metadata *meta = &kfence_metadata[i];
+
+		if (meta->state == KFENCE_OBJECT_ALLOCATED)
+			for_each_canary(meta, check_canary_byte);
+	}
+}
+
+static int kfence_check_canary_callback(struct notifier_block *nb,
+					unsigned long reason, void *arg)
+{
+	kfence_check_all_canary();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kfence_check_canary_notifier = {
+	.notifier_call = kfence_check_canary_callback,
+};
+
+/* === Allocation Gate Timer ================================================ */
+
+static struct delayed_work kfence_timer;
+
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+/* Wait queue to wake up allocation-gate timer task. */
+static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
+
+static void wake_up_kfence_timer(struct irq_work *work)
+{
+	wake_up(&allocation_wait);
+}
+static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
+#endif
+
+/*
+ * Set up delayed work, which will enable and disable the static key. We need to
+ * use a work queue (rather than a simple timer), since enabling and disabling a
+ * static key cannot be done from an interrupt.
+ *
+ * Note: Toggling a static branch currently causes IPIs, and here we'll end up
+ * with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with
+ * more aggressive sampling intervals), we could get away with a variant that
+ * avoids IPIs, at the cost of not immediately capturing allocations if the
+ * instructions remain cached.
+ */
+static void toggle_allocation_gate(struct work_struct *work)
+{
+	if (!READ_ONCE(kfence_enabled))
+		return;
+
+	atomic_set(&kfence_allocation_gate, 0);
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+	/* Enable static key, and await allocation to happen. */
+	static_branch_enable(&kfence_allocation_key);
+
+	if (sysctl_hung_task_timeout_secs) {
+		/*
+		 * During low activity with no allocations we might wait a
+		 * while; let's avoid the hung task warning.
+		 */
+		wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
+					sysctl_hung_task_timeout_secs * HZ / 2);
+	} else {
+		wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
+	}
+
+	/* Disable static key and reset timer. */
+	static_branch_disable(&kfence_allocation_key);
+#endif
+	queue_delayed_work(system_unbound_wq, &kfence_timer,
+			   msecs_to_jiffies(kfence_sample_interval));
+}
+
+/* === Public interface ===================================================== */
+
+void __init kfence_alloc_pool(void)
+{
+	if (!kfence_sample_interval)
+		return;
+
+	/* if the pool has already been initialized by arch, skip the below. */
+	if (__kfence_pool)
+		return;
+
+	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+
+	if (!__kfence_pool)
+		pr_err("failed to allocate pool\n");
+}
+
+static void kfence_init_enable(void)
+{
+	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
+		static_branch_enable(&kfence_allocation_key);
+
+	if (kfence_deferrable)
+		INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate);
+	else
+		INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate);
+
+	if (kfence_check_on_panic)
+		atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier);
+
+	WRITE_ONCE(kfence_enabled, true);
+	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
+
+	pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+		CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
+		(void *)(__kfence_pool + KFENCE_POOL_SIZE));
+}
+
+void __init kfence_init(void)
+{
+	stack_hash_seed = get_random_u32();
+
+	/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
+	if (!kfence_sample_interval)
+		return;
+
+	if (!kfence_init_pool_early()) {
+		pr_err("%s failed\n", __func__);
+		return;
+	}
+
+	kfence_init_enable();
+}
+
+static int kfence_init_late(void)
+{
+	const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
+#ifdef CONFIG_CONTIG_ALLOC
+	struct page *pages;
+
+	pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
+	if (!pages)
+		return -ENOMEM;
+	__kfence_pool = page_to_virt(pages);
+#else
+	if (nr_pages > MAX_ORDER_NR_PAGES) {
+		pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
+		return -EINVAL;
+	}
+	__kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
+	if (!__kfence_pool)
+		return -ENOMEM;
+#endif
+
+	if (!kfence_init_pool_late()) {
+		pr_err("%s failed\n", __func__);
+		return -EBUSY;
+	}
+
+	kfence_init_enable();
+	kfence_debugfs_init();
+
+	return 0;
+}
+
+static int kfence_enable_late(void)
+{
+	if (!__kfence_pool)
+		return kfence_init_late();
+
+	WRITE_ONCE(kfence_enabled, true);
+	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
+	pr_info("re-enabled\n");
+	return 0;
+}
+
+void kfence_shutdown_cache(struct kmem_cache *s)
+{
+	unsigned long flags;
+	struct kfence_metadata *meta;
+	int i;
+
+	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+		bool in_use;
+
+		meta = &kfence_metadata[i];
+
+		/*
+		 * If we observe some inconsistent cache and state pair where we
+		 * should have returned false here, cache destruction is racing
+		 * with either kmem_cache_alloc() or kmem_cache_free(). Taking
+		 * the lock will not help, as different critical section
+		 * serialization will have the same outcome.
+		 */
+		if (READ_ONCE(meta->cache) != s ||
+		    READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
+			continue;
+
+		raw_spin_lock_irqsave(&meta->lock, flags);
+		in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
+		raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+		if (in_use) {
+			/*
+			 * This cache still has allocations, and we should not
+			 * release them back into the freelist so they can still
+			 * safely be used and retain the kernel's default
+			 * behaviour of keeping the allocations alive (leak the
+			 * cache); however, they effectively become "zombie
+			 * allocations" as the KFENCE objects are the only ones
+			 * still in use and the owning cache is being destroyed.
+			 *
+			 * We mark them freed, so that any subsequent use shows
+			 * more useful error messages that will include stack
+			 * traces of the user of the object, the original
+			 * allocation, and caller to shutdown_cache().
+			 */
+			kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true);
+		}
+	}
+
+	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+		meta = &kfence_metadata[i];
+
+		/* See above. */
+		if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
+			continue;
+
+		raw_spin_lock_irqsave(&meta->lock, flags);
+		if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
+			meta->cache = NULL;
+		raw_spin_unlock_irqrestore(&meta->lock, flags);
+	}
+}
+
+void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+{
+	unsigned long stack_entries[KFENCE_STACK_DEPTH];
+	size_t num_stack_entries;
+	u32 alloc_stack_hash;
+
+	/*
+	 * Perform size check before switching kfence_allocation_gate, so that
+	 * we don't disable KFENCE without making an allocation.
+	 */
+	if (size > PAGE_SIZE) {
+		atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
+		return NULL;
+	}
+
+	/*
+	 * Skip allocations from non-default zones, including DMA. We cannot
+	 * guarantee that pages in the KFENCE pool will have the requested
+	 * properties (e.g. reside in DMAable memory).
+	 */
+	if ((flags & GFP_ZONEMASK) ||
+	    (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
+		atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
+		return NULL;
+	}
+
+	/*
+	 * Skip allocations for this slab, if KFENCE has been disabled for
+	 * this slab.
+	 */
+	if (s->flags & SLAB_SKIP_KFENCE)
+		return NULL;
+
+	if (atomic_inc_return(&kfence_allocation_gate) > 1)
+		return NULL;
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+	/*
+	 * waitqueue_active() is fully ordered after the update of
+	 * kfence_allocation_gate per atomic_inc_return().
+	 */
+	if (waitqueue_active(&allocation_wait)) {
+		/*
+		 * Calling wake_up() here may deadlock when allocations happen
+		 * from within timer code. Use an irq_work to defer it.
+		 */
+		irq_work_queue(&wake_up_kfence_timer_work);
+	}
+#endif
+
+	if (!READ_ONCE(kfence_enabled))
+		return NULL;
+
+	num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
+
+	/*
+	 * Do expensive check for coverage of allocation in slow-path after
+	 * allocation_gate has already become non-zero, even though it might
+	 * mean not making any allocation within a given sample interval.
+	 *
+	 * This ensures reasonable allocation coverage when the pool is almost
+	 * full, including avoiding long-lived allocations of the same source
+	 * filling up the pool (e.g. pagecache allocations).
+	 */
+	alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
+	if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
+		atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
+		return NULL;
+	}
+
+	return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
+				    alloc_stack_hash);
+}
+
+size_t kfence_ksize(const void *addr)
+{
+	const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+	/*
+	 * Read locklessly -- if there is a race with __kfence_alloc(), this is
+	 * either a use-after-free or invalid access.
+	 */
+	return meta ? meta->size : 0;
+}
+
+void *kfence_object_start(const void *addr)
+{
+	const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+	/*
+	 * Read locklessly -- if there is a race with __kfence_alloc(), this is
+	 * either a use-after-free or invalid access.
+	 */
+	return meta ? (void *)meta->addr : NULL;
+}
+
+void __kfence_free(void *addr)
+{
+	struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+#ifdef CONFIG_MEMCG
+	KFENCE_WARN_ON(meta->objcg);
+#endif
+	/*
+	 * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
+	 * the object, as the object page may be recycled for other-typed
+	 * objects once it has been freed. meta->cache may be NULL if the cache
+	 * was destroyed.
+	 */
+	if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
+		call_rcu(&meta->rcu_head, rcu_guarded_free);
+	else
+		kfence_guarded_free(addr, meta, false);
+}
+
+bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
+{
+	const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
+	struct kfence_metadata *to_report = NULL;
+	enum kfence_error_type error_type;
+	unsigned long flags;
+
+	if (!is_kfence_address((void *)addr))
+		return false;
+
+	if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
+		return kfence_unprotect(addr); /* ... unprotect and proceed. */
+
+	atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+
+	if (page_index % 2) {
+		/* This is a redzone, report a buffer overflow. */
+		struct kfence_metadata *meta;
+		int distance = 0;
+
+		meta = addr_to_metadata(addr - PAGE_SIZE);
+		if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+			to_report = meta;
+			/* Data race ok; distance calculation approximate. */
+			distance = addr - data_race(meta->addr + meta->size);
+		}
+
+		meta = addr_to_metadata(addr + PAGE_SIZE);
+		if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+			/* Data race ok; distance calculation approximate. */
+			if (!to_report || distance > data_race(meta->addr) - addr)
+				to_report = meta;
+		}
+
+		if (!to_report)
+			goto out;
+
+		raw_spin_lock_irqsave(&to_report->lock, flags);
+		to_report->unprotected_page = addr;
+		error_type = KFENCE_ERROR_OOB;
+
+		/*
+		 * If the object was freed before we took the look we can still
+		 * report this as an OOB -- the report will simply show the
+		 * stacktrace of the free as well.
+		 */
+	} else {
+		to_report = addr_to_metadata(addr);
+		if (!to_report)
+			goto out;
+
+		raw_spin_lock_irqsave(&to_report->lock, flags);
+		error_type = KFENCE_ERROR_UAF;
+		/*
+		 * We may race with __kfence_alloc(), and it is possible that a
+		 * freed object may be reallocated. We simply report this as a
+		 * use-after-free, with the stack trace showing the place where
+		 * the object was re-allocated.
+		 */
+	}
+
+out:
+	if (to_report) {
+		kfence_report_error(addr, is_write, regs, to_report, error_type);
+		raw_spin_unlock_irqrestore(&to_report->lock, flags);
+	} else {
+		/* This may be a UAF or OOB access, but we can't be sure. */
+		kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
+	}
+
+	return kfence_unprotect(addr); /* Unprotect and let access proceed. */
+}
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
new file mode 100644
index 000000000..600f2e243
--- /dev/null
+++ b/mm/kfence/kfence.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel Electric-Fence (KFENCE). For more info please see
+ * Documentation/dev-tools/kfence.rst.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef MM_KFENCE_KFENCE_H
+#define MM_KFENCE_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "../slab.h" /* for struct kmem_cache */
+
+/*
+ * Get the canary byte pattern for @addr. Use a pattern that varies based on the
+ * lower 3 bits of the address, to detect memory corruptions with higher
+ * probability, where similar constants are used.
+ */
+#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
+
+/* Maximum stack depth for reports. */
+#define KFENCE_STACK_DEPTH 64
+
+/* KFENCE object states. */
+enum kfence_object_state {
+	KFENCE_OBJECT_UNUSED,		/* Object is unused. */
+	KFENCE_OBJECT_ALLOCATED,	/* Object is currently allocated. */
+	KFENCE_OBJECT_FREED,		/* Object was allocated, and then freed. */
+};
+
+/* Alloc/free tracking information. */
+struct kfence_track {
+	pid_t pid;
+	int cpu;
+	u64 ts_nsec;
+	int num_stack_entries;
+	unsigned long stack_entries[KFENCE_STACK_DEPTH];
+};
+
+/* KFENCE metadata per guarded allocation. */
+struct kfence_metadata {
+	struct list_head list;		/* Freelist node; access under kfence_freelist_lock. */
+	struct rcu_head rcu_head;	/* For delayed freeing. */
+
+	/*
+	 * Lock protecting below data; to ensure consistency of the below data,
+	 * since the following may execute concurrently: __kfence_alloc(),
+	 * __kfence_free(), kfence_handle_page_fault(). However, note that we
+	 * cannot grab the same metadata off the freelist twice, and multiple
+	 * __kfence_alloc() cannot run concurrently on the same metadata.
+	 */
+	raw_spinlock_t lock;
+
+	/* The current state of the object; see above. */
+	enum kfence_object_state state;
+
+	/*
+	 * Allocated object address; cannot be calculated from size, because of
+	 * alignment requirements.
+	 *
+	 * Invariant: ALIGN_DOWN(addr, PAGE_SIZE) is constant.
+	 */
+	unsigned long addr;
+
+	/*
+	 * The size of the original allocation.
+	 */
+	size_t size;
+
+	/*
+	 * The kmem_cache cache of the last allocation; NULL if never allocated
+	 * or the cache has already been destroyed.
+	 */
+	struct kmem_cache *cache;
+
+	/*
+	 * In case of an invalid access, the page that was unprotected; we
+	 * optimistically only store one address.
+	 */
+	unsigned long unprotected_page;
+
+	/* Allocation and free stack information. */
+	struct kfence_track alloc_track;
+	struct kfence_track free_track;
+	/* For updating alloc_covered on frees. */
+	u32 alloc_stack_hash;
+#ifdef CONFIG_MEMCG
+	struct obj_cgroup *objcg;
+#endif
+};
+
+extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
+{
+	long index;
+
+	/* The checks do not affect performance; only called from slow-paths. */
+
+	if (!is_kfence_address((void *)addr))
+		return NULL;
+
+	/*
+	 * May be an invalid index if called with an address at the edge of
+	 * __kfence_pool, in which case we would report an "invalid access"
+	 * error.
+	 */
+	index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
+	if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
+		return NULL;
+
+	return &kfence_metadata[index];
+}
+
+/* KFENCE error types for report generation. */
+enum kfence_error_type {
+	KFENCE_ERROR_OOB,		/* Detected a out-of-bounds access. */
+	KFENCE_ERROR_UAF,		/* Detected a use-after-free access. */
+	KFENCE_ERROR_CORRUPTION,	/* Detected a memory corruption on free. */
+	KFENCE_ERROR_INVALID,		/* Invalid access of unknown type. */
+	KFENCE_ERROR_INVALID_FREE,	/* Invalid free. */
+};
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+			 const struct kfence_metadata *meta, enum kfence_error_type type);
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
+
+#endif /* MM_KFENCE_KFENCE_H */
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
new file mode 100644
index 000000000..a97bffe0c
--- /dev/null
+++ b/mm/kfence/kfence_test.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for KFENCE memory safety error detector. Since the interface with
+ * which KFENCE's reports are obtained is via the console, this is the output we
+ * should verify. For each test case checks the presence (or absence) of
+ * generated reports. Relies on 'console' tracepoint to capture reports as they
+ * appear in the kernel log.
+ *
+ * Copyright (C) 2020, Google LLC.
+ * Author: Alexander Potapenko <glider@google.com>
+ *         Marco Elver <elver@google.com>
+ */
+
+#include <kunit/test.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kfence.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tracepoint.h>
+#include <trace/events/printk.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* May be overridden by <asm/kfence.h>. */
+#ifndef arch_kfence_test_address
+#define arch_kfence_test_address(addr) (addr)
+#endif
+
+#define KFENCE_TEST_REQUIRES(test, cond) do {			\
+	if (!(cond))						\
+		kunit_skip((test), "Test requires: " #cond);	\
+} while (0)
+
+/* Report as observed from console. */
+static struct {
+	spinlock_t lock;
+	int nlines;
+	char lines[2][256];
+} observed = {
+	.lock = __SPIN_LOCK_UNLOCKED(observed.lock),
+};
+
+/* Probe for console output: obtains observed lines of interest. */
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+	unsigned long flags;
+	int nlines;
+
+	spin_lock_irqsave(&observed.lock, flags);
+	nlines = observed.nlines;
+
+	if (strnstr(buf, "BUG: KFENCE: ", len) && strnstr(buf, "test_", len)) {
+		/*
+		 * KFENCE report and related to the test.
+		 *
+		 * The provided @buf is not NUL-terminated; copy no more than
+		 * @len bytes and let strscpy() add the missing NUL-terminator.
+		 */
+		strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
+		nlines = 1;
+	} else if (nlines == 1 && (strnstr(buf, "at 0x", len) || strnstr(buf, "of 0x", len))) {
+		strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
+	}
+
+	WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
+	spin_unlock_irqrestore(&observed.lock, flags);
+}
+
+/* Check if a report related to the test exists. */
+static bool report_available(void)
+{
+	return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
+}
+
+/* Information we expect in a report. */
+struct expect_report {
+	enum kfence_error_type type; /* The type or error. */
+	void *fn; /* Function pointer to expected function where access occurred. */
+	char *addr; /* Address at which the bad access occurred. */
+	bool is_write; /* Is access a write. */
+};
+
+static const char *get_access_type(const struct expect_report *r)
+{
+	return r->is_write ? "write" : "read";
+}
+
+/* Check observed report matches information in @r. */
+static bool report_matches(const struct expect_report *r)
+{
+	unsigned long addr = (unsigned long)r->addr;
+	bool ret = false;
+	unsigned long flags;
+	typeof(observed.lines) expect;
+	const char *end;
+	char *cur;
+
+	/* Doubled-checked locking. */
+	if (!report_available())
+		return false;
+
+	/* Generate expected report contents. */
+
+	/* Title */
+	cur = expect[0];
+	end = &expect[0][sizeof(expect[0]) - 1];
+	switch (r->type) {
+	case KFENCE_ERROR_OOB:
+		cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s",
+				 get_access_type(r));
+		break;
+	case KFENCE_ERROR_UAF:
+		cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free %s",
+				 get_access_type(r));
+		break;
+	case KFENCE_ERROR_CORRUPTION:
+		cur += scnprintf(cur, end - cur, "BUG: KFENCE: memory corruption");
+		break;
+	case KFENCE_ERROR_INVALID:
+		cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid %s",
+				 get_access_type(r));
+		break;
+	case KFENCE_ERROR_INVALID_FREE:
+		cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid free");
+		break;
+	}
+
+	scnprintf(cur, end - cur, " in %pS", r->fn);
+	/* The exact offset won't match, remove it; also strip module name. */
+	cur = strchr(expect[0], '+');
+	if (cur)
+		*cur = '\0';
+
+	/* Access information */
+	cur = expect[1];
+	end = &expect[1][sizeof(expect[1]) - 1];
+
+	switch (r->type) {
+	case KFENCE_ERROR_OOB:
+		cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
+		addr = arch_kfence_test_address(addr);
+		break;
+	case KFENCE_ERROR_UAF:
+		cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
+		addr = arch_kfence_test_address(addr);
+		break;
+	case KFENCE_ERROR_CORRUPTION:
+		cur += scnprintf(cur, end - cur, "Corrupted memory at");
+		break;
+	case KFENCE_ERROR_INVALID:
+		cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
+		addr = arch_kfence_test_address(addr);
+		break;
+	case KFENCE_ERROR_INVALID_FREE:
+		cur += scnprintf(cur, end - cur, "Invalid free of");
+		break;
+	}
+
+	cur += scnprintf(cur, end - cur, " 0x%p", (void *)addr);
+
+	spin_lock_irqsave(&observed.lock, flags);
+	if (!report_available())
+		goto out; /* A new report is being captured. */
+
+	/* Finally match expected output to what we actually observed. */
+	ret = strstr(observed.lines[0], expect[0]) && strstr(observed.lines[1], expect[1]);
+out:
+	spin_unlock_irqrestore(&observed.lock, flags);
+	return ret;
+}
+
+/* ===== Test cases ===== */
+
+#define TEST_PRIV_WANT_MEMCACHE ((void *)1)
+
+/* Cache used by tests; if NULL, allocate from kmalloc instead. */
+static struct kmem_cache *test_cache;
+
+static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t flags,
+			       void (*ctor)(void *))
+{
+	if (test->priv != TEST_PRIV_WANT_MEMCACHE)
+		return size;
+
+	kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
+
+	/*
+	 * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
+	 * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
+	 * allocate via memcg, if enabled.
+	 */
+	flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
+	test_cache = kmem_cache_create("test", size, 1, flags, ctor);
+	KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
+
+	return size;
+}
+
+static void test_cache_destroy(void)
+{
+	if (!test_cache)
+		return;
+
+	kmem_cache_destroy(test_cache);
+	test_cache = NULL;
+}
+
+static inline size_t kmalloc_cache_alignment(size_t size)
+{
+	return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align;
+}
+
+/* Must always inline to match stack trace against caller. */
+static __always_inline void test_free(void *ptr)
+{
+	if (test_cache)
+		kmem_cache_free(test_cache, ptr);
+	else
+		kfree(ptr);
+}
+
+/*
+ * If this should be a KFENCE allocation, and on which side the allocation and
+ * the closest guard page should be.
+ */
+enum allocation_policy {
+	ALLOCATE_ANY, /* KFENCE, any side. */
+	ALLOCATE_LEFT, /* KFENCE, left side of page. */
+	ALLOCATE_RIGHT, /* KFENCE, right side of page. */
+	ALLOCATE_NONE, /* No KFENCE allocation. */
+};
+
+/*
+ * Try to get a guarded allocation from KFENCE. Uses either kmalloc() or the
+ * current test_cache if set up.
+ */
+static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocation_policy policy)
+{
+	void *alloc;
+	unsigned long timeout, resched_after;
+	const char *policy_name;
+
+	switch (policy) {
+	case ALLOCATE_ANY:
+		policy_name = "any";
+		break;
+	case ALLOCATE_LEFT:
+		policy_name = "left";
+		break;
+	case ALLOCATE_RIGHT:
+		policy_name = "right";
+		break;
+	case ALLOCATE_NONE:
+		policy_name = "none";
+		break;
+	}
+
+	kunit_info(test, "%s: size=%zu, gfp=%x, policy=%s, cache=%i\n", __func__, size, gfp,
+		   policy_name, !!test_cache);
+
+	/*
+	 * 100x the sample interval should be more than enough to ensure we get
+	 * a KFENCE allocation eventually.
+	 */
+	timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
+	/*
+	 * Especially for non-preemption kernels, ensure the allocation-gate
+	 * timer can catch up: after @resched_after, every failed allocation
+	 * attempt yields, to ensure the allocation-gate timer is scheduled.
+	 */
+	resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
+	do {
+		if (test_cache)
+			alloc = kmem_cache_alloc(test_cache, gfp);
+		else
+			alloc = kmalloc(size, gfp);
+
+		if (is_kfence_address(alloc)) {
+			struct slab *slab = virt_to_slab(alloc);
+			struct kmem_cache *s = test_cache ?:
+					kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
+
+			/*
+			 * Verify that various helpers return the right values
+			 * even for KFENCE objects; these are required so that
+			 * memcg accounting works correctly.
+			 */
+			KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
+			KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
+
+			if (policy == ALLOCATE_ANY)
+				return alloc;
+			if (policy == ALLOCATE_LEFT && PAGE_ALIGNED(alloc))
+				return alloc;
+			if (policy == ALLOCATE_RIGHT && !PAGE_ALIGNED(alloc))
+				return alloc;
+		} else if (policy == ALLOCATE_NONE)
+			return alloc;
+
+		test_free(alloc);
+
+		if (time_after(jiffies, resched_after))
+			cond_resched();
+	} while (time_before(jiffies, timeout));
+
+	KUNIT_ASSERT_TRUE_MSG(test, false, "failed to allocate from KFENCE");
+	return NULL; /* Unreachable. */
+}
+
+static void test_out_of_bounds_read(struct kunit *test)
+{
+	size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_OOB,
+		.fn = test_out_of_bounds_read,
+		.is_write = false,
+	};
+	char *buf;
+
+	setup_test_cache(test, size, 0, NULL);
+
+	/*
+	 * If we don't have our own cache, adjust based on alignment, so that we
+	 * actually access guard pages on either side.
+	 */
+	if (!test_cache)
+		size = kmalloc_cache_alignment(size);
+
+	/* Test both sides. */
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+	expect.addr = buf - 1;
+	READ_ONCE(*expect.addr);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+	test_free(buf);
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+	expect.addr = buf + size;
+	READ_ONCE(*expect.addr);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+	test_free(buf);
+}
+
+static void test_out_of_bounds_write(struct kunit *test)
+{
+	size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_OOB,
+		.fn = test_out_of_bounds_write,
+		.is_write = true,
+	};
+	char *buf;
+
+	setup_test_cache(test, size, 0, NULL);
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+	expect.addr = buf - 1;
+	WRITE_ONCE(*expect.addr, 42);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+	test_free(buf);
+}
+
+static void test_use_after_free_read(struct kunit *test)
+{
+	const size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_UAF,
+		.fn = test_use_after_free_read,
+		.is_write = false,
+	};
+
+	setup_test_cache(test, size, 0, NULL);
+	expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	test_free(expect.addr);
+	READ_ONCE(*expect.addr);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_double_free(struct kunit *test)
+{
+	const size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_INVALID_FREE,
+		.fn = test_double_free,
+	};
+
+	setup_test_cache(test, size, 0, NULL);
+	expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	test_free(expect.addr);
+	test_free(expect.addr); /* Double-free. */
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_invalid_addr_free(struct kunit *test)
+{
+	const size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_INVALID_FREE,
+		.fn = test_invalid_addr_free,
+	};
+	char *buf;
+
+	setup_test_cache(test, size, 0, NULL);
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	expect.addr = buf + 1; /* Free on invalid address. */
+	test_free(expect.addr); /* Invalid address free. */
+	test_free(buf); /* No error. */
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_corruption(struct kunit *test)
+{
+	size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_CORRUPTION,
+		.fn = test_corruption,
+	};
+	char *buf;
+
+	setup_test_cache(test, size, 0, NULL);
+
+	/* Test both sides. */
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+	expect.addr = buf + size;
+	WRITE_ONCE(*expect.addr, 42);
+	test_free(buf);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+	expect.addr = buf - 1;
+	WRITE_ONCE(*expect.addr, 42);
+	test_free(buf);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/*
+ * KFENCE is unable to detect an OOB if the allocation's alignment requirements
+ * leave a gap between the object and the guard page. Specifically, an
+ * allocation of e.g. 73 bytes is aligned on 8 and 128 bytes for SLUB or SLAB
+ * respectively. Therefore it is impossible for the allocated object to
+ * contiguously line up with the right guard page.
+ *
+ * However, we test that an access to memory beyond the gap results in KFENCE
+ * detecting an OOB access.
+ */
+static void test_kmalloc_aligned_oob_read(struct kunit *test)
+{
+	const size_t size = 73;
+	const size_t align = kmalloc_cache_alignment(size);
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_OOB,
+		.fn = test_kmalloc_aligned_oob_read,
+		.is_write = false,
+	};
+	char *buf;
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+
+	/*
+	 * The object is offset to the right, so there won't be an OOB to the
+	 * left of it.
+	 */
+	READ_ONCE(*(buf - 1));
+	KUNIT_EXPECT_FALSE(test, report_available());
+
+	/*
+	 * @buf must be aligned on @align, therefore buf + size belongs to the
+	 * same page -> no OOB.
+	 */
+	READ_ONCE(*(buf + size));
+	KUNIT_EXPECT_FALSE(test, report_available());
+
+	/* Overflowing by @align bytes will result in an OOB. */
+	expect.addr = buf + size + align;
+	READ_ONCE(*expect.addr);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+	test_free(buf);
+}
+
+static void test_kmalloc_aligned_oob_write(struct kunit *test)
+{
+	const size_t size = 73;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_CORRUPTION,
+		.fn = test_kmalloc_aligned_oob_write,
+	};
+	char *buf;
+
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+	/*
+	 * The object is offset to the right, so we won't get a page
+	 * fault immediately after it.
+	 */
+	expect.addr = buf + size;
+	WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
+	KUNIT_EXPECT_FALSE(test, report_available());
+	test_free(buf);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test cache shrinking and destroying with KFENCE. */
+static void test_shrink_memcache(struct kunit *test)
+{
+	const size_t size = 32;
+	void *buf;
+
+	setup_test_cache(test, size, 0, NULL);
+	KUNIT_EXPECT_TRUE(test, test_cache);
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	kmem_cache_shrink(test_cache);
+	test_free(buf);
+
+	KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void ctor_set_x(void *obj)
+{
+	/* Every object has at least 8 bytes. */
+	memset(obj, 'x', 8);
+}
+
+/* Ensure that SL*B does not modify KFENCE objects on bulk free. */
+static void test_free_bulk(struct kunit *test)
+{
+	int iter;
+
+	for (iter = 0; iter < 5; iter++) {
+		const size_t size = setup_test_cache(test, 8 + prandom_u32_max(300), 0,
+						     (iter & 1) ? ctor_set_x : NULL);
+		void *objects[] = {
+			test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT),
+			test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+			test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT),
+			test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+			test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+		};
+
+		kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects);
+		KUNIT_ASSERT_FALSE(test, report_available());
+		test_cache_destroy();
+	}
+}
+
+/* Test init-on-free works. */
+static void test_init_on_free(struct kunit *test)
+{
+	const size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_UAF,
+		.fn = test_init_on_free,
+		.is_write = false,
+	};
+	int i;
+
+	KFENCE_TEST_REQUIRES(test, IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON));
+	/* Assume it hasn't been disabled on command line. */
+
+	setup_test_cache(test, size, 0, NULL);
+	expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	for (i = 0; i < size; i++)
+		expect.addr[i] = i + 1;
+	test_free(expect.addr);
+
+	for (i = 0; i < size; i++) {
+		/*
+		 * This may fail if the page was recycled by KFENCE and then
+		 * written to again -- this however, is near impossible with a
+		 * default config.
+		 */
+		KUNIT_EXPECT_EQ(test, expect.addr[i], (char)0);
+
+		if (!i) /* Only check first access to not fail test if page is ever re-protected. */
+			KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+	}
+}
+
+/* Ensure that constructors work properly. */
+static void test_memcache_ctor(struct kunit *test)
+{
+	const size_t size = 32;
+	char *buf;
+	int i;
+
+	setup_test_cache(test, size, 0, ctor_set_x);
+	buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+
+	for (i = 0; i < 8; i++)
+		KUNIT_EXPECT_EQ(test, buf[i], (char)'x');
+
+	test_free(buf);
+
+	KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/* Test that memory is zeroed if requested. */
+static void test_gfpzero(struct kunit *test)
+{
+	const size_t size = PAGE_SIZE; /* PAGE_SIZE so we can use ALLOCATE_ANY. */
+	char *buf1, *buf2;
+	int i;
+
+	/* Skip if we think it'd take too long. */
+	KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
+
+	setup_test_cache(test, size, 0, NULL);
+	buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	for (i = 0; i < size; i++)
+		buf1[i] = i + 1;
+	test_free(buf1);
+
+	/* Try to get same address again -- this can take a while. */
+	for (i = 0;; i++) {
+		buf2 = test_alloc(test, size, GFP_KERNEL | __GFP_ZERO, ALLOCATE_ANY);
+		if (buf1 == buf2)
+			break;
+		test_free(buf2);
+
+		if (kthread_should_stop() || (i == CONFIG_KFENCE_NUM_OBJECTS)) {
+			kunit_warn(test, "giving up ... cannot get same object back\n");
+			return;
+		}
+		cond_resched();
+	}
+
+	for (i = 0; i < size; i++)
+		KUNIT_EXPECT_EQ(test, buf2[i], (char)0);
+
+	test_free(buf2);
+
+	KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void test_invalid_access(struct kunit *test)
+{
+	const struct expect_report expect = {
+		.type = KFENCE_ERROR_INVALID,
+		.fn = test_invalid_access,
+		.addr = &__kfence_pool[10],
+		.is_write = false,
+	};
+
+	READ_ONCE(__kfence_pool[10]);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test SLAB_TYPESAFE_BY_RCU works. */
+static void test_memcache_typesafe_by_rcu(struct kunit *test)
+{
+	const size_t size = 32;
+	struct expect_report expect = {
+		.type = KFENCE_ERROR_UAF,
+		.fn = test_memcache_typesafe_by_rcu,
+		.is_write = false,
+	};
+
+	setup_test_cache(test, size, SLAB_TYPESAFE_BY_RCU, NULL);
+	KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+
+	expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+	*expect.addr = 42;
+
+	rcu_read_lock();
+	test_free(expect.addr);
+	KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+	/*
+	 * Up to this point, memory should not have been freed yet, and
+	 * therefore there should be no KFENCE report from the above access.
+	 */
+	rcu_read_unlock();
+
+	/* Above access to @expect.addr should not have generated a report! */
+	KUNIT_EXPECT_FALSE(test, report_available());
+
+	/* Only after rcu_barrier() is the memory guaranteed to be freed. */
+	rcu_barrier();
+
+	/* Expect use-after-free. */
+	KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test krealloc(). */
+static void test_krealloc(struct kunit *test)
+{
+	const size_t size = 32;
+	const struct expect_report expect = {
+		.type = KFENCE_ERROR_UAF,
+		.fn = test_krealloc,
+		.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY),
+		.is_write = false,
+	};
+	char *buf = expect.addr;
+	int i;
+
+	KUNIT_EXPECT_FALSE(test, test_cache);
+	KUNIT_EXPECT_EQ(test, ksize(buf), size); /* Precise size match after KFENCE alloc. */
+	for (i = 0; i < size; i++)
+		buf[i] = i + 1;
+
+	/* Check that we successfully change the size. */
+	buf = krealloc(buf, size * 3, GFP_KERNEL); /* Grow. */
+	/* Note: Might no longer be a KFENCE alloc. */
+	KUNIT_EXPECT_GE(test, ksize(buf), size * 3);
+	for (i = 0; i < size; i++)
+		KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+	for (; i < size * 3; i++) /* Fill to extra bytes. */
+		buf[i] = i + 1;
+
+	buf = krealloc(buf, size * 2, GFP_KERNEL); /* Shrink. */
+	KUNIT_EXPECT_GE(test, ksize(buf), size * 2);
+	for (i = 0; i < size * 2; i++)
+		KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+
+	buf = krealloc(buf, 0, GFP_KERNEL); /* Free. */
+	KUNIT_EXPECT_EQ(test, (unsigned long)buf, (unsigned long)ZERO_SIZE_PTR);
+	KUNIT_ASSERT_FALSE(test, report_available()); /* No reports yet! */
+
+	READ_ONCE(*expect.addr); /* Ensure krealloc() actually freed earlier KFENCE object. */
+	KUNIT_ASSERT_TRUE(test, report_matches(&expect));
+}
+
+/* Test that some objects from a bulk allocation belong to KFENCE pool. */
+static void test_memcache_alloc_bulk(struct kunit *test)
+{
+	const size_t size = 32;
+	bool pass = false;
+	unsigned long timeout;
+
+	setup_test_cache(test, size, 0, NULL);
+	KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+	/*
+	 * 100x the sample interval should be more than enough to ensure we get
+	 * a KFENCE allocation eventually.
+	 */
+	timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
+	do {
+		void *objects[100];
+		int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
+						   objects);
+		if (!num)
+			continue;
+		for (i = 0; i < ARRAY_SIZE(objects); i++) {
+			if (is_kfence_address(objects[i])) {
+				pass = true;
+				break;
+			}
+		}
+		kmem_cache_free_bulk(test_cache, num, objects);
+		/*
+		 * kmem_cache_alloc_bulk() disables interrupts, and calling it
+		 * in a tight loop may not give KFENCE a chance to switch the
+		 * static branch. Call cond_resched() to let KFENCE chime in.
+		 */
+		cond_resched();
+	} while (!pass && time_before(jiffies, timeout));
+
+	KUNIT_EXPECT_TRUE(test, pass);
+	KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/*
+ * KUnit does not provide a way to provide arguments to tests, and we encode
+ * additional info in the name. Set up 2 tests per test case, one using the
+ * default allocator, and another using a custom memcache (suffix '-memcache').
+ */
+#define KFENCE_KUNIT_CASE(test_name)						\
+	{ .run_case = test_name, .name = #test_name },				\
+	{ .run_case = test_name, .name = #test_name "-memcache" }
+
+static struct kunit_case kfence_test_cases[] = {
+	KFENCE_KUNIT_CASE(test_out_of_bounds_read),
+	KFENCE_KUNIT_CASE(test_out_of_bounds_write),
+	KFENCE_KUNIT_CASE(test_use_after_free_read),
+	KFENCE_KUNIT_CASE(test_double_free),
+	KFENCE_KUNIT_CASE(test_invalid_addr_free),
+	KFENCE_KUNIT_CASE(test_corruption),
+	KFENCE_KUNIT_CASE(test_free_bulk),
+	KFENCE_KUNIT_CASE(test_init_on_free),
+	KUNIT_CASE(test_kmalloc_aligned_oob_read),
+	KUNIT_CASE(test_kmalloc_aligned_oob_write),
+	KUNIT_CASE(test_shrink_memcache),
+	KUNIT_CASE(test_memcache_ctor),
+	KUNIT_CASE(test_invalid_access),
+	KUNIT_CASE(test_gfpzero),
+	KUNIT_CASE(test_memcache_typesafe_by_rcu),
+	KUNIT_CASE(test_krealloc),
+	KUNIT_CASE(test_memcache_alloc_bulk),
+	{},
+};
+
+/* ===== End test cases ===== */
+
+static int test_init(struct kunit *test)
+{
+	unsigned long flags;
+	int i;
+
+	if (!__kfence_pool)
+		return -EINVAL;
+
+	spin_lock_irqsave(&observed.lock, flags);
+	for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
+		observed.lines[i][0] = '\0';
+	observed.nlines = 0;
+	spin_unlock_irqrestore(&observed.lock, flags);
+
+	/* Any test with 'memcache' in its name will want a memcache. */
+	if (strstr(test->name, "memcache"))
+		test->priv = TEST_PRIV_WANT_MEMCACHE;
+	else
+		test->priv = NULL;
+
+	return 0;
+}
+
+static void test_exit(struct kunit *test)
+{
+	test_cache_destroy();
+}
+
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+	check_trace_callback_type_console(probe_console);
+	if (!strcmp(tp->name, "console"))
+		WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+	if (!strcmp(tp->name, "console"))
+		tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+static int kfence_suite_init(struct kunit_suite *suite)
+{
+	/*
+	 * Because we want to be able to build the test as a module, we need to
+	 * iterate through all known tracepoints, since the static registration
+	 * won't work here.
+	 */
+	for_each_kernel_tracepoint(register_tracepoints, NULL);
+	return 0;
+}
+
+static void kfence_suite_exit(struct kunit_suite *suite)
+{
+	for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+	tracepoint_synchronize_unregister();
+}
+
+static struct kunit_suite kfence_test_suite = {
+	.name = "kfence",
+	.test_cases = kfence_test_cases,
+	.init = test_init,
+	.exit = test_exit,
+	.suite_init = kfence_suite_init,
+	.suite_exit = kfence_suite_exit,
+};
+
+kunit_test_suites(&kfence_test_suite);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Alexander Potapenko <glider@google.com>, Marco Elver <elver@google.com>");
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
new file mode 100644
index 000000000..60205f125
--- /dev/null
+++ b/mm/kfence/report.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE reporting.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include <linux/stdarg.h>
+
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/math.h>
+#include <linux/printk.h>
+#include <linux/sched/debug.h>
+#include <linux/seq_file.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <trace/events/error_report.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* May be overridden by <asm/kfence.h>. */
+#ifndef ARCH_FUNC_PREFIX
+#define ARCH_FUNC_PREFIX ""
+#endif
+
+extern bool no_hash_pointers;
+
+/* Helper function to either print to a seq_file or to console. */
+__printf(2, 3)
+static void seq_con_printf(struct seq_file *seq, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	if (seq)
+		seq_vprintf(seq, fmt, args);
+	else
+		vprintk(fmt, args);
+	va_end(args);
+}
+
+/*
+ * Get the number of stack entries to skip to get out of MM internals. @type is
+ * optional, and if set to NULL, assumes an allocation or free stack.
+ */
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries,
+			    const enum kfence_error_type *type)
+{
+	char buf[64];
+	int skipnr, fallback = 0;
+
+	if (type) {
+		/* Depending on error type, find different stack entries. */
+		switch (*type) {
+		case KFENCE_ERROR_UAF:
+		case KFENCE_ERROR_OOB:
+		case KFENCE_ERROR_INVALID:
+			/*
+			 * kfence_handle_page_fault() may be called with pt_regs
+			 * set to NULL; in that case we'll simply show the full
+			 * stack trace.
+			 */
+			return 0;
+		case KFENCE_ERROR_CORRUPTION:
+		case KFENCE_ERROR_INVALID_FREE:
+			break;
+		}
+	}
+
+	for (skipnr = 0; skipnr < num_entries; skipnr++) {
+		int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
+
+		if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
+		    str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
+		    str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
+		    !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
+			/*
+			 * In case of tail calls from any of the below to any of
+			 * the above, optimized by the compiler such that the
+			 * stack trace would omit the initial entry point below.
+			 */
+			fallback = skipnr + 1;
+		}
+
+		/*
+		 * The below list should only include the initial entry points
+		 * into the slab allocators. Includes the *_bulk() variants by
+		 * checking prefixes.
+		 */
+		if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
+		    str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
+		    str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
+		    str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
+			goto found;
+	}
+	if (fallback < num_entries)
+		return fallback;
+found:
+	skipnr++;
+	return skipnr < num_entries ? skipnr : 0;
+}
+
+static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadata *meta,
+			       bool show_alloc)
+{
+	const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
+	u64 ts_sec = track->ts_nsec;
+	unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC);
+
+	/* Timestamp matches printk timestamp format. */
+	seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus:\n",
+		       show_alloc ? "allocated" : "freed", track->pid,
+		       track->cpu, (unsigned long)ts_sec, rem_nsec / 1000);
+
+	if (track->num_stack_entries) {
+		/* Skip allocation/free internals stack. */
+		int i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
+
+		/* stack_trace_seq_print() does not exist; open code our own. */
+		for (; i < track->num_stack_entries; i++)
+			seq_con_printf(seq, " %pS\n", (void *)track->stack_entries[i]);
+	} else {
+		seq_con_printf(seq, " no %s stack\n", show_alloc ? "allocation" : "deallocation");
+	}
+}
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta)
+{
+	const int size = abs(meta->size);
+	const unsigned long start = meta->addr;
+	const struct kmem_cache *const cache = meta->cache;
+
+	lockdep_assert_held(&meta->lock);
+
+	if (meta->state == KFENCE_OBJECT_UNUSED) {
+		seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
+		return;
+	}
+
+	seq_con_printf(seq, "kfence-#%td: 0x%p-0x%p, size=%d, cache=%s\n\n",
+		       meta - kfence_metadata, (void *)start, (void *)(start + size - 1),
+		       size, (cache && cache->name) ? cache->name : "<destroyed>");
+
+	kfence_print_stack(seq, meta, true);
+
+	if (meta->state == KFENCE_OBJECT_FREED) {
+		seq_con_printf(seq, "\n");
+		kfence_print_stack(seq, meta, false);
+	}
+}
+
+/*
+ * Show bytes at @addr that are different from the expected canary values, up to
+ * @max_bytes.
+ */
+static void print_diff_canary(unsigned long address, size_t bytes_to_show,
+			      const struct kfence_metadata *meta)
+{
+	const unsigned long show_until_addr = address + bytes_to_show;
+	const u8 *cur, *end;
+
+	/* Do not show contents of object nor read into following guard page. */
+	end = (const u8 *)(address < meta->addr ? min(show_until_addr, meta->addr)
+						: min(show_until_addr, PAGE_ALIGN(address)));
+
+	pr_cont("[");
+	for (cur = (const u8 *)address; cur < end; cur++) {
+		if (*cur == KFENCE_CANARY_PATTERN(cur))
+			pr_cont(" .");
+		else if (no_hash_pointers)
+			pr_cont(" 0x%02x", *cur);
+		else /* Do not leak kernel memory in non-debug builds. */
+			pr_cont(" !");
+	}
+	pr_cont(" ]");
+}
+
+static const char *get_access_type(bool is_write)
+{
+	return is_write ? "write" : "read";
+}
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+			 const struct kfence_metadata *meta, enum kfence_error_type type)
+{
+	unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
+	const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1;
+	int num_stack_entries;
+	int skipnr = 0;
+
+	if (regs) {
+		num_stack_entries = stack_trace_save_regs(regs, stack_entries, KFENCE_STACK_DEPTH, 0);
+	} else {
+		num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1);
+		skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type);
+	}
+
+	/* Require non-NULL meta, except if KFENCE_ERROR_INVALID. */
+	if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
+		return;
+
+	if (meta)
+		lockdep_assert_held(&meta->lock);
+	/*
+	 * Because we may generate reports in printk-unfriendly parts of the
+	 * kernel, such as scheduler code, the use of printk() could deadlock.
+	 * Until such time that all printing code here is safe in all parts of
+	 * the kernel, accept the risk, and just get our message out (given the
+	 * system might already behave unpredictably due to the memory error).
+	 * As such, also disable lockdep to hide warnings, and avoid disabling
+	 * lockdep for the rest of the kernel.
+	 */
+	lockdep_off();
+
+	pr_err("==================================================================\n");
+	/* Print report header. */
+	switch (type) {
+	case KFENCE_ERROR_OOB: {
+		const bool left_of_object = address < meta->addr;
+
+		pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
+		       (void *)stack_entries[skipnr]);
+		pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
+		       get_access_type(is_write), (void *)address,
+		       left_of_object ? meta->addr - address : address - meta->addr,
+		       left_of_object ? "left" : "right", object_index);
+		break;
+	}
+	case KFENCE_ERROR_UAF:
+		pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
+		       (void *)stack_entries[skipnr]);
+		pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
+		       get_access_type(is_write), (void *)address, object_index);
+		break;
+	case KFENCE_ERROR_CORRUPTION:
+		pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
+		pr_err("Corrupted memory at 0x%p ", (void *)address);
+		print_diff_canary(address, 16, meta);
+		pr_cont(" (in kfence-#%td):\n", object_index);
+		break;
+	case KFENCE_ERROR_INVALID:
+		pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
+		       (void *)stack_entries[skipnr]);
+		pr_err("Invalid %s at 0x%p:\n", get_access_type(is_write),
+		       (void *)address);
+		break;
+	case KFENCE_ERROR_INVALID_FREE:
+		pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
+		pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
+		       object_index);
+		break;
+	}
+
+	/* Print stack trace and object info. */
+	stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0);
+
+	if (meta) {
+		pr_err("\n");
+		kfence_print_object(NULL, meta);
+	}
+
+	/* Print report footer. */
+	pr_err("\n");
+	if (no_hash_pointers && regs)
+		show_regs(regs);
+	else
+		dump_stack_print_info(KERN_ERR);
+	trace_error_report_end(ERROR_DETECTOR_KFENCE, address);
+	pr_err("==================================================================\n");
+
+	lockdep_on();
+
+	check_panic_on_warn("KFENCE");
+
+	/* We encountered a memory safety error, taint the kernel! */
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
+}
+
+#ifdef CONFIG_PRINTK
+static void kfence_to_kp_stack(const struct kfence_track *track, void **kp_stack)
+{
+	int i, j;
+
+	i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
+	for (j = 0; i < track->num_stack_entries && j < KS_ADDRS_COUNT; ++i, ++j)
+		kp_stack[j] = (void *)track->stack_entries[i];
+	if (j < KS_ADDRS_COUNT)
+		kp_stack[j] = NULL;
+}
+
+bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+{
+	struct kfence_metadata *meta = addr_to_metadata((unsigned long)object);
+	unsigned long flags;
+
+	if (!meta)
+		return false;
+
+	/*
+	 * If state is UNUSED at least show the pointer requested; the rest
+	 * would be garbage data.
+	 */
+	kpp->kp_ptr = object;
+
+	/* Requesting info an a never-used object is almost certainly a bug. */
+	if (WARN_ON(meta->state == KFENCE_OBJECT_UNUSED))
+		return true;
+
+	raw_spin_lock_irqsave(&meta->lock, flags);
+
+	kpp->kp_slab = slab;
+	kpp->kp_slab_cache = meta->cache;
+	kpp->kp_objp = (void *)meta->addr;
+	kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack);
+	if (meta->state == KFENCE_OBJECT_FREED)
+		kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack);
+	/* get_stack_skipnr() ensures the first entry is outside allocator. */
+	kpp->kp_ret = kpp->kp_stack[0];
+
+	raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+	return true;
+}
+#endif
-- 
cgit v1.2.3