39 files changed, 11944 insertions, 0 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
new file mode 100644
index 0000000000..3046923915
--- /dev/null
+++ b/arch/mips/mm/Makefile
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux/MIPS-specific parts of the memory manager.
+#
+
+obj-y				+= cache.o
+obj-y				+= context.o
+obj-y				+= extable.o
+obj-y				+= fault.o
+obj-y				+= init.o
+obj-y				+= mmap.o
+obj-y				+= page.o
+obj-y				+= page-funcs.o
+obj-y				+= pgtable.o
+obj-y				+= tlbex.o
+obj-y				+= tlbex-fault.o
+obj-y				+= tlb-funcs.o
+
+ifdef CONFIG_CPU_MICROMIPS
+obj-y				+= uasm-micromips.o
+else
+obj-y				+= uasm-mips.o
+endif
+
+ifndef CONFIG_EVA
+obj-y				+= maccess.o
+endif
+
+obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
+obj-$(CONFIG_64BIT)		+= ioremap64.o pgtable-64.o
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_DMA_NONCOHERENT)	+= dma-noncoherent.o
+
+obj-$(CONFIG_CPU_R3K_TLB)	+= tlb-r3k.o
+obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R3000)		+= c-r3k.o
+obj-$(CONFIG_CPU_SB1)		+= c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o
+obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o
+
+obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
+obj-$(CONFIG_R5000_CPU_SCACHE)	+= sc-r5k.o
+obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o
+obj-$(CONFIG_MIPS_CPU_SCACHE)	+= sc-mips.o
+obj-$(CONFIG_SCACHE_DEBUGFS)	+= sc-debugfs.o
+
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
new file mode 100644
index 0000000000..b7393b61cf
--- /dev/null
+++ b/arch/mips/mm/c-octeon.c
@@ -0,0 +1,355 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2007 Cavium Networks
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/io.h>
+
+#include <asm/bcache.h>
+#include <asm/bootinfo.h>
+#include <asm/cacheops.h>
+#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
+#include <asm/page.h>
+#include <asm/r4kcache.h>
+#include <asm/traps.h>
+#include <asm/mmu_context.h>
+
+#include <asm/octeon/octeon.h>
+
+unsigned long long cache_err_dcache[NR_CPUS];
+EXPORT_SYMBOL_GPL(cache_err_dcache);
+
+/*
+ * Octeon automatically flushes the dcache on tlb changes, so
+ * from Linux's viewpoint it acts much like a physically
+ * tagged cache. No flushing is needed
+ *
+ */
+static void octeon_flush_data_cache_page(unsigned long addr)
+{
+    /* Nothing to do */
+}
+
+static inline void octeon_local_flush_icache(void)
+{
+	asm volatile ("synci 0($0)");
+}
+
+/*
+ * Flush local I-cache for the specified range.
+ */
+static void local_octeon_flush_icache_range(unsigned long start,
+					    unsigned long end)
+{
+	octeon_local_flush_icache();
+}
+
+/**
+ * octeon_flush_icache_all_cores -  Flush caches as necessary for all cores
+ * affected by a vma. If no vma is supplied, all cores are flushed.
+ *
+ * @vma:    VMA to flush or NULL to flush all icaches.
+ */
+static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
+{
+	extern void octeon_send_ipi_single(int cpu, unsigned int action);
+#ifdef CONFIG_SMP
+	int cpu;
+	cpumask_t mask;
+#endif
+
+	mb();
+	octeon_local_flush_icache();
+#ifdef CONFIG_SMP
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	/*
+	 * If we have a vma structure, we only need to worry about
+	 * cores it has been used on
+	 */
+	if (vma)
+		mask = *mm_cpumask(vma->vm_mm);
+	else
+		mask = *cpu_online_mask;
+	cpumask_clear_cpu(cpu, &mask);
+#ifdef CONFIG_CAVIUM_OCTEON_SOC
+	for_each_cpu(cpu, &mask)
+		octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH);
+#else
+	smp_call_function_many(&mask, (smp_call_func_t)octeon_local_flush_icache,
+			       NULL, 1);
+#endif
+
+	preempt_enable();
+#endif
+}
+
+
+/*
+ * Called to flush the icache on all cores
+ */
+static void octeon_flush_icache_all(void)
+{
+	octeon_flush_icache_all_cores(NULL);
+}
+
+
+/**
+ * octeon_flush_cache_mm - flush all memory associated with a memory context.
+ *
+ * @mm:	    Memory context to flush
+ */
+static void octeon_flush_cache_mm(struct mm_struct *mm)
+{
+	/*
+	 * According to the R4K version of this file, CPUs without
+	 * dcache aliases don't need to do anything here
+	 */
+}
+
+
+/*
+ * Flush a range of kernel addresses out of the icache
+ *
+ */
+static void octeon_flush_icache_range(unsigned long start, unsigned long end)
+{
+	octeon_flush_icache_all_cores(NULL);
+}
+
+
+/**
+ * octeon_flush_cache_range - Flush a range out of a vma
+ *
+ * @vma:    VMA to flush
+ * @start:  beginning address for flush
+ * @end:    ending address for flush
+ */
+static void octeon_flush_cache_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
+{
+	if (vma->vm_flags & VM_EXEC)
+		octeon_flush_icache_all_cores(vma);
+}
+
+
+/**
+ * octeon_flush_cache_page - Flush a specific page of a vma
+ *
+ * @vma:    VMA to flush page for
+ * @page:   Page to flush
+ * @pfn:    Page frame number
+ */
+static void octeon_flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long page, unsigned long pfn)
+{
+	if (vma->vm_flags & VM_EXEC)
+		octeon_flush_icache_all_cores(vma);
+}
+
+static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+	BUG();
+}
+
+/*
+ * Probe Octeon's caches
+ *
+ */
+static void probe_octeon(void)
+{
+	unsigned long icache_size;
+	unsigned long dcache_size;
+	unsigned int config1;
+	struct cpuinfo_mips *c = &current_cpu_data;
+	int cputype = current_cpu_type();
+
+	config1 = read_c0_config1();
+	switch (cputype) {
+	case CPU_CAVIUM_OCTEON:
+	case CPU_CAVIUM_OCTEON_PLUS:
+		c->icache.linesz = 2 << ((config1 >> 19) & 7);
+		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.ways = 1 + ((config1 >> 16) & 7);
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size =
+			c->icache.sets * c->icache.ways * c->icache.linesz;
+		c->icache.waybit = ffs(icache_size / c->icache.ways) - 1;
+		c->dcache.linesz = 128;
+		if (cputype == CPU_CAVIUM_OCTEON_PLUS)
+			c->dcache.sets = 2; /* CN5XXX has two Dcache sets */
+		else
+			c->dcache.sets = 1; /* CN3XXX has one Dcache set */
+		c->dcache.ways = 64;
+		dcache_size =
+			c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_CAVIUM_OCTEON2:
+		c->icache.linesz = 2 << ((config1 >> 19) & 7);
+		c->icache.sets = 8;
+		c->icache.ways = 37;
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz;
+
+		c->dcache.linesz = 128;
+		c->dcache.ways = 32;
+		c->dcache.sets = 8;
+		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_CAVIUM_OCTEON3:
+		c->icache.linesz = 128;
+		c->icache.sets = 16;
+		c->icache.ways = 39;
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz;
+
+		c->dcache.linesz = 128;
+		c->dcache.ways = 32;
+		c->dcache.sets = 8;
+		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	default:
+		panic("Unsupported Cavium Networks CPU type");
+		break;
+	}
+
+	/* compute a couple of other cache variables */
+	c->icache.waysize = icache_size / c->icache.ways;
+	c->dcache.waysize = dcache_size / c->dcache.ways;
+
+	c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways);
+	c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways);
+
+	if (smp_processor_id() == 0) {
+		pr_info("Primary instruction cache %ldkB, %s, %d way, "
+			"%d sets, linesize %d bytes.\n",
+			icache_size >> 10,
+			cpu_has_vtag_icache ?
+				"virtually tagged" : "physically tagged",
+			c->icache.ways, c->icache.sets, c->icache.linesz);
+
+		pr_info("Primary data cache %ldkB, %d-way, %d sets, "
+			"linesize %d bytes.\n",
+			dcache_size >> 10, c->dcache.ways,
+			c->dcache.sets, c->dcache.linesz);
+	}
+}
+
+static void  octeon_cache_error_setup(void)
+{
+	extern char except_vec2_octeon;
+	set_handler(0x100, &except_vec2_octeon, 0x80);
+}
+
+/*
+ * Setup the Octeon cache flush routines
+ *
+ */
+void octeon_cache_init(void)
+{
+	probe_octeon();
+
+	shm_align_mask = PAGE_SIZE - 1;
+
+	flush_cache_all			= octeon_flush_icache_all;
+	__flush_cache_all		= octeon_flush_icache_all;
+	flush_cache_mm			= octeon_flush_cache_mm;
+	flush_cache_page		= octeon_flush_cache_page;
+	flush_cache_range		= octeon_flush_cache_range;
+	flush_icache_all		= octeon_flush_icache_all;
+	flush_data_cache_page		= octeon_flush_data_cache_page;
+	flush_icache_range		= octeon_flush_icache_range;
+	local_flush_icache_range	= local_octeon_flush_icache_range;
+	__flush_icache_user_range	= octeon_flush_icache_range;
+	__local_flush_icache_user_range	= local_octeon_flush_icache_range;
+
+	__flush_kernel_vmap_range	= octeon_flush_kernel_vmap_range;
+
+	build_clear_page();
+	build_copy_page();
+
+	board_cache_error_setup = octeon_cache_error_setup;
+}
+
+/*
+ * Handle a cache error exception
+ */
+static RAW_NOTIFIER_HEAD(co_cache_error_chain);
+
+int register_co_cache_error_notifier(struct notifier_block *nb)
+{
+	return raw_notifier_chain_register(&co_cache_error_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_co_cache_error_notifier);
+
+int unregister_co_cache_error_notifier(struct notifier_block *nb)
+{
+	return raw_notifier_chain_unregister(&co_cache_error_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier);
+
+static void co_cache_error_call_notifiers(unsigned long val)
+{
+	int rv = raw_notifier_call_chain(&co_cache_error_chain, val, NULL);
+	if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+		u64 dcache_err;
+		unsigned long coreid = cvmx_get_core_num();
+		u64 icache_err = read_octeon_c0_icacheerr();
+
+		if (val) {
+			dcache_err = cache_err_dcache[coreid];
+			cache_err_dcache[coreid] = 0;
+		} else {
+			dcache_err = read_octeon_c0_dcacheerr();
+		}
+
+		pr_err("Core%lu: Cache error exception:\n", coreid);
+		pr_err("cp0_errorepc == %lx\n", read_c0_errorepc());
+		if (icache_err & 1) {
+			pr_err("CacheErr (Icache) == %llx\n",
+			       (unsigned long long)icache_err);
+			write_octeon_c0_icacheerr(0);
+		}
+		if (dcache_err & 1) {
+			pr_err("CacheErr (Dcache) == %llx\n",
+			       (unsigned long long)dcache_err);
+		}
+	}
+}
+
+/*
+ * Called when the exception is recoverable
+ */
+
+asmlinkage void cache_parity_error_octeon_recoverable(void)
+{
+	co_cache_error_call_notifiers(0);
+}
+
+/*
+ * Called when the exception is not recoverable
+ */
+
+asmlinkage void cache_parity_error_octeon_non_recoverable(void)
+{
+	co_cache_error_call_notifiers(1);
+	panic("Can't handle cache error: nested exception");
+}
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
new file mode 100644
index 0000000000..5869df848f
--- /dev/null
+++ b/arch/mips/mm/c-r3k.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r2300.c: R2000 and R3000 specific mmu/cache code.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ *
+ * with a lot of changes to make this thing work for R3000s
+ * Tx39XX R4k style caches added. HK
+ * Copyright (C) 1998, 1999, 2000 Harald Koerfgen
+ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
+ * Copyright (C) 2001, 2004, 2007  Maciej W. Rozycki
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/isadep.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+static unsigned long icache_size, dcache_size;		/* Size in bytes */
+static unsigned long icache_lsize, dcache_lsize;	/* Size in bytes */
+
+unsigned long r3k_cache_size(unsigned long ca_flags)
+{
+	unsigned long flags, status, dummy, size;
+	volatile unsigned long *p;
+
+	p = (volatile unsigned long *) KSEG0;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ca_flags|flags)&~ST0_IEC);
+
+	*p = 0xa5a55a5a;
+	dummy = *p;
+	status = read_c0_status();
+
+	if (dummy != 0xa5a55a5a || (status & ST0_CM)) {
+		size = 0;
+	} else {
+		for (size = 128; size <= 0x40000; size <<= 1)
+			*(p + size) = 0;
+		*p = -1;
+		for (size = 128;
+		     (size <= 0x40000) && (*(p + size) == 0);
+		     size <<= 1)
+			;
+		if (size > 0x40000)
+			size = 0;
+	}
+
+	write_c0_status(flags);
+
+	return size * sizeof(*p);
+}
+
+unsigned long r3k_cache_lsize(unsigned long ca_flags)
+{
+	unsigned long flags, status, lsize, i;
+	volatile unsigned long *p;
+
+	p = (volatile unsigned long *) KSEG0;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ca_flags|flags)&~ST0_IEC);
+
+	for (i = 0; i < 128; i++)
+		*(p + i) = 0;
+	*(volatile unsigned char *)p = 0;
+	for (lsize = 1; lsize < 128; lsize <<= 1) {
+		*(p + lsize);
+		status = read_c0_status();
+		if (!(status & ST0_CM))
+			break;
+	}
+	for (i = 0; i < 128; i += lsize)
+		*(volatile unsigned char *)(p + i) = 0;
+
+	write_c0_status(flags);
+
+	return lsize * sizeof(*p);
+}
+
+static void r3k_probe_cache(void)
+{
+	dcache_size = r3k_cache_size(ST0_ISC);
+	if (dcache_size)
+		dcache_lsize = r3k_cache_lsize(ST0_ISC);
+
+	icache_size = r3k_cache_size(ST0_ISC|ST0_SWC);
+	if (icache_size)
+		icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC);
+}
+
+static void r3k_flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, i, flags;
+	volatile unsigned char *p;
+
+	size = end - start;
+	if (size > icache_size || KSEGX(start) != KSEG0) {
+		start = KSEG0;
+		size = icache_size;
+	}
+	p = (char *)start;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
+
+	for (i = 0; i < size; i += 0x080) {
+		asm(	"sb\t$0, 0x000(%0)\n\t"
+			"sb\t$0, 0x004(%0)\n\t"
+			"sb\t$0, 0x008(%0)\n\t"
+			"sb\t$0, 0x00c(%0)\n\t"
+			"sb\t$0, 0x010(%0)\n\t"
+			"sb\t$0, 0x014(%0)\n\t"
+			"sb\t$0, 0x018(%0)\n\t"
+			"sb\t$0, 0x01c(%0)\n\t"
+			"sb\t$0, 0x020(%0)\n\t"
+			"sb\t$0, 0x024(%0)\n\t"
+			"sb\t$0, 0x028(%0)\n\t"
+			"sb\t$0, 0x02c(%0)\n\t"
+			"sb\t$0, 0x030(%0)\n\t"
+			"sb\t$0, 0x034(%0)\n\t"
+			"sb\t$0, 0x038(%0)\n\t"
+			"sb\t$0, 0x03c(%0)\n\t"
+			"sb\t$0, 0x040(%0)\n\t"
+			"sb\t$0, 0x044(%0)\n\t"
+			"sb\t$0, 0x048(%0)\n\t"
+			"sb\t$0, 0x04c(%0)\n\t"
+			"sb\t$0, 0x050(%0)\n\t"
+			"sb\t$0, 0x054(%0)\n\t"
+			"sb\t$0, 0x058(%0)\n\t"
+			"sb\t$0, 0x05c(%0)\n\t"
+			"sb\t$0, 0x060(%0)\n\t"
+			"sb\t$0, 0x064(%0)\n\t"
+			"sb\t$0, 0x068(%0)\n\t"
+			"sb\t$0, 0x06c(%0)\n\t"
+			"sb\t$0, 0x070(%0)\n\t"
+			"sb\t$0, 0x074(%0)\n\t"
+			"sb\t$0, 0x078(%0)\n\t"
+			"sb\t$0, 0x07c(%0)\n\t"
+			: : "r" (p) );
+		p += 0x080;
+	}
+
+	write_c0_status(flags);
+}
+
+static void r3k_flush_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, i, flags;
+	volatile unsigned char *p;
+
+	size = end - start;
+	if (size > dcache_size || KSEGX(start) != KSEG0) {
+		start = KSEG0;
+		size = dcache_size;
+	}
+	p = (char *)start;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ST0_ISC|flags)&~ST0_IEC);
+
+	for (i = 0; i < size; i += 0x080) {
+		asm(	"sb\t$0, 0x000(%0)\n\t"
+			"sb\t$0, 0x004(%0)\n\t"
+			"sb\t$0, 0x008(%0)\n\t"
+			"sb\t$0, 0x00c(%0)\n\t"
+			"sb\t$0, 0x010(%0)\n\t"
+			"sb\t$0, 0x014(%0)\n\t"
+			"sb\t$0, 0x018(%0)\n\t"
+			"sb\t$0, 0x01c(%0)\n\t"
+			"sb\t$0, 0x020(%0)\n\t"
+			"sb\t$0, 0x024(%0)\n\t"
+			"sb\t$0, 0x028(%0)\n\t"
+			"sb\t$0, 0x02c(%0)\n\t"
+			"sb\t$0, 0x030(%0)\n\t"
+			"sb\t$0, 0x034(%0)\n\t"
+			"sb\t$0, 0x038(%0)\n\t"
+			"sb\t$0, 0x03c(%0)\n\t"
+			"sb\t$0, 0x040(%0)\n\t"
+			"sb\t$0, 0x044(%0)\n\t"
+			"sb\t$0, 0x048(%0)\n\t"
+			"sb\t$0, 0x04c(%0)\n\t"
+			"sb\t$0, 0x050(%0)\n\t"
+			"sb\t$0, 0x054(%0)\n\t"
+			"sb\t$0, 0x058(%0)\n\t"
+			"sb\t$0, 0x05c(%0)\n\t"
+			"sb\t$0, 0x060(%0)\n\t"
+			"sb\t$0, 0x064(%0)\n\t"
+			"sb\t$0, 0x068(%0)\n\t"
+			"sb\t$0, 0x06c(%0)\n\t"
+			"sb\t$0, 0x070(%0)\n\t"
+			"sb\t$0, 0x074(%0)\n\t"
+			"sb\t$0, 0x078(%0)\n\t"
+			"sb\t$0, 0x07c(%0)\n\t"
+			: : "r" (p) );
+		p += 0x080;
+	}
+
+	write_c0_status(flags);
+}
+
+static inline void r3k_flush_cache_all(void)
+{
+}
+
+static inline void r3k___flush_cache_all(void)
+{
+	r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size);
+	r3k_flush_icache_range(KSEG0, KSEG0 + icache_size);
+}
+
+static void r3k_flush_cache_mm(struct mm_struct *mm)
+{
+}
+
+static void r3k_flush_cache_range(struct vm_area_struct *vma,
+				  unsigned long start, unsigned long end)
+{
+}
+
+static void r3k_flush_cache_page(struct vm_area_struct *vma,
+				 unsigned long addr, unsigned long pfn)
+{
+	unsigned long kaddr = KSEG0ADDR(pfn << PAGE_SHIFT);
+	int exec = vma->vm_flags & VM_EXEC;
+	struct mm_struct *mm = vma->vm_mm;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pr_debug("cpage[%08llx,%08lx]\n",
+		 cpu_context(smp_processor_id(), mm), addr);
+
+	/* No ASID => no such page in the cache.  */
+	if (cpu_context(smp_processor_id(), mm) == 0)
+		return;
+
+	pmdp = pmd_off(mm, addr);
+	ptep = pte_offset_kernel(pmdp, addr);
+
+	/* Invalid => no such page in the cache.  */
+	if (!(pte_val(*ptep) & _PAGE_PRESENT))
+		return;
+
+	r3k_flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
+	if (exec)
+		r3k_flush_icache_range(kaddr, kaddr + PAGE_SIZE);
+}
+
+static void r3k_flush_data_cache_page(unsigned long addr)
+{
+}
+
+static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+	BUG();
+}
+
+static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)
+{
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	iob();
+	r3k_flush_dcache_range(start, start + size);
+}
+
+void r3k_cache_init(void)
+{
+	extern void build_clear_page(void);
+	extern void build_copy_page(void);
+
+	r3k_probe_cache();
+
+	flush_cache_all = r3k_flush_cache_all;
+	__flush_cache_all = r3k___flush_cache_all;
+	flush_cache_mm = r3k_flush_cache_mm;
+	flush_cache_range = r3k_flush_cache_range;
+	flush_cache_page = r3k_flush_cache_page;
+	flush_icache_range = r3k_flush_icache_range;
+	local_flush_icache_range = r3k_flush_icache_range;
+	__flush_icache_user_range = r3k_flush_icache_range;
+	__local_flush_icache_user_range = r3k_flush_icache_range;
+
+	__flush_kernel_vmap_range = r3k_flush_kernel_vmap_range;
+
+	flush_data_cache_page = r3k_flush_data_cache_page;
+
+	_dma_cache_wback_inv = r3k_dma_cache_wback_inv;
+	_dma_cache_wback = r3k_dma_cache_wback_inv;
+	_dma_cache_inv = r3k_dma_cache_wback_inv;
+
+	pr_info("Primary instruction cache %ldkB, linesize %ld bytes.\n",
+		icache_size >> 10, icache_lsize);
+	pr_info("Primary data cache %ldkB, linesize %ld bytes.\n",
+		dcache_size >> 10, dcache_lsize);
+
+	build_clear_page();
+	build_copy_page();
+}
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
new file mode 100644
index 0000000000..187d1c1636
--- /dev/null
+++ b/arch/mips/mm/c-r4k.c
@@ -0,0 +1,1835 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ */
+#include <linux/cpu_pm.h>
+#include <linux/hardirq.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/dma-map-ops.h> /* for dma_default_coherent */
+
+#include <asm/bcache.h>
+#include <asm/bootinfo.h>
+#include <asm/cache.h>
+#include <asm/cacheops.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/r4kcache.h>
+#include <asm/sections.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h> /* for run_uncached() */
+#include <asm/traps.h>
+#include <asm/mips-cps.h>
+
+/*
+ * Bits describing what cache ops an SMP callback function may perform.
+ *
+ * R4K_HIT   -	Virtual user or kernel address based cache operations. The
+ *		active_mm must be checked before using user addresses, falling
+ *		back to kmap.
+ * R4K_INDEX -	Index based cache operations.
+ */
+
+#define R4K_HIT		BIT(0)
+#define R4K_INDEX	BIT(1)
+
+/**
+ * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
+ * @type:	Type of cache operations (R4K_HIT or R4K_INDEX).
+ *
+ * Decides whether a cache op needs to be performed on every core in the system.
+ * This may change depending on the @type of cache operation, as well as the set
+ * of online CPUs, so preemption should be disabled by the caller to prevent CPU
+ * hotplug from changing the result.
+ *
+ * Returns:	1 if the cache operation @type should be done on every core in
+ *		the system.
+ *		0 if the cache operation @type is globalized and only needs to
+ *		be performed on a simple CPU.
+ */
+static inline bool r4k_op_needs_ipi(unsigned int type)
+{
+	/* The MIPS Coherence Manager (CM) globalizes address-based cache ops */
+	if (type == R4K_HIT && mips_cm_present())
+		return false;
+
+	/*
+	 * Hardware doesn't globalize the required cache ops, so SMP calls may
+	 * be needed, but only if there are foreign CPUs (non-siblings with
+	 * separate caches).
+	 */
+	/* cpu_foreign_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	return !cpumask_empty(&cpu_foreign_map[0]);
+#else
+	return false;
+#endif
+}
+
+/*
+ * Special Variant of smp_call_function for use by cache functions:
+ *
+ *  o No return value
+ *  o collapses to normal function call on UP kernels
+ *  o collapses to normal function call on systems with a single shared
+ *    primary cache.
+ *  o doesn't disable interrupts on the local CPU
+ */
+static inline void r4k_on_each_cpu(unsigned int type,
+				   void (*func)(void *info), void *info)
+{
+	preempt_disable();
+	if (r4k_op_needs_ipi(type))
+		smp_call_function_many(&cpu_foreign_map[smp_processor_id()],
+				       func, info, 1);
+	func(info);
+	preempt_enable();
+}
+
+/*
+ * Must die.
+ */
+static unsigned long icache_size __read_mostly;
+static unsigned long dcache_size __read_mostly;
+static unsigned long vcache_size __read_mostly;
+static unsigned long scache_size __read_mostly;
+
+#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
+#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
+
+#define R4600_HIT_CACHEOP_WAR_IMPL					\
+do {									\
+	if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&		\
+	    cpu_is_r4600_v2_x())					\
+		*(volatile unsigned long *)CKSEG1;			\
+	if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP))					\
+		__asm__ __volatile__("nop;nop;nop;nop");		\
+} while (0)
+
+static void (*r4k_blast_dcache_page)(unsigned long addr);
+
+static inline void r4k_blast_dcache_page_dc32(unsigned long addr)
+{
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	blast_dcache32_page(addr);
+}
+
+static inline void r4k_blast_dcache_page_dc64(unsigned long addr)
+{
+	blast_dcache64_page(addr);
+}
+
+static inline void r4k_blast_dcache_page_dc128(unsigned long addr)
+{
+	blast_dcache128_page(addr);
+}
+
+static void r4k_blast_dcache_page_setup(void)
+{
+	unsigned long  dc_lsize = cpu_dcache_line_size();
+
+	switch (dc_lsize) {
+	case 0:
+		r4k_blast_dcache_page = (void *)cache_noop;
+		break;
+	case 16:
+		r4k_blast_dcache_page = blast_dcache16_page;
+		break;
+	case 32:
+		r4k_blast_dcache_page = r4k_blast_dcache_page_dc32;
+		break;
+	case 64:
+		r4k_blast_dcache_page = r4k_blast_dcache_page_dc64;
+		break;
+	case 128:
+		r4k_blast_dcache_page = r4k_blast_dcache_page_dc128;
+		break;
+	default:
+		break;
+	}
+}
+
+#ifndef CONFIG_EVA
+#define r4k_blast_dcache_user_page  r4k_blast_dcache_page
+#else
+
+static void (*r4k_blast_dcache_user_page)(unsigned long addr);
+
+static void r4k_blast_dcache_user_page_setup(void)
+{
+	unsigned long  dc_lsize = cpu_dcache_line_size();
+
+	if (dc_lsize == 0)
+		r4k_blast_dcache_user_page = (void *)cache_noop;
+	else if (dc_lsize == 16)
+		r4k_blast_dcache_user_page = blast_dcache16_user_page;
+	else if (dc_lsize == 32)
+		r4k_blast_dcache_user_page = blast_dcache32_user_page;
+	else if (dc_lsize == 64)
+		r4k_blast_dcache_user_page = blast_dcache64_user_page;
+}
+
+#endif
+
+void (* r4k_blast_dcache)(void);
+EXPORT_SYMBOL(r4k_blast_dcache);
+
+static void r4k_blast_dcache_setup(void)
+{
+	unsigned long dc_lsize = cpu_dcache_line_size();
+
+	if (dc_lsize == 0)
+		r4k_blast_dcache = (void *)cache_noop;
+	else if (dc_lsize == 16)
+		r4k_blast_dcache = blast_dcache16;
+	else if (dc_lsize == 32)
+		r4k_blast_dcache = blast_dcache32;
+	else if (dc_lsize == 64)
+		r4k_blast_dcache = blast_dcache64;
+	else if (dc_lsize == 128)
+		r4k_blast_dcache = blast_dcache128;
+}
+
+/* force code alignment (used for CONFIG_WAR_TX49XX_ICACHE_INDEX_INV) */
+#define JUMP_TO_ALIGN(order) \
+	__asm__ __volatile__( \
+		"b\t1f\n\t" \
+		".align\t" #order "\n\t" \
+		"1:\n\t" \
+		)
+#define CACHE32_UNROLL32_ALIGN	JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */
+#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11)
+
+static inline void blast_r4600_v1_icache32(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	blast_icache32();
+	local_irq_restore(flags);
+}
+
+static inline void tx49_blast_icache32(void)
+{
+	unsigned long start = INDEX_BASE;
+	unsigned long end = start + current_cpu_data.icache.waysize;
+	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;
+	unsigned long ws_end = current_cpu_data.icache.ways <<
+			       current_cpu_data.icache.waybit;
+	unsigned long ws, addr;
+
+	CACHE32_UNROLL32_ALIGN2;
+	/* I'm in even chunk.  blast odd chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc)
+		for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
+	CACHE32_UNROLL32_ALIGN;
+	/* I'm in odd chunk.  blast even chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc)
+		for (addr = start; addr < end; addr += 0x400 * 2)
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
+}
+
+static void (* r4k_blast_icache_page)(unsigned long addr);
+
+static void r4k_blast_icache_page_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 0)
+		r4k_blast_icache_page = (void *)cache_noop;
+	else if (ic_lsize == 16)
+		r4k_blast_icache_page = blast_icache16_page;
+	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF)
+		r4k_blast_icache_page = loongson2_blast_icache32_page;
+	else if (ic_lsize == 32)
+		r4k_blast_icache_page = blast_icache32_page;
+	else if (ic_lsize == 64)
+		r4k_blast_icache_page = blast_icache64_page;
+	else if (ic_lsize == 128)
+		r4k_blast_icache_page = blast_icache128_page;
+}
+
+#ifndef CONFIG_EVA
+#define r4k_blast_icache_user_page  r4k_blast_icache_page
+#else
+
+static void (*r4k_blast_icache_user_page)(unsigned long addr);
+
+static void r4k_blast_icache_user_page_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 0)
+		r4k_blast_icache_user_page = (void *)cache_noop;
+	else if (ic_lsize == 16)
+		r4k_blast_icache_user_page = blast_icache16_user_page;
+	else if (ic_lsize == 32)
+		r4k_blast_icache_user_page = blast_icache32_user_page;
+	else if (ic_lsize == 64)
+		r4k_blast_icache_user_page = blast_icache64_user_page;
+}
+
+#endif
+
+void (* r4k_blast_icache)(void);
+EXPORT_SYMBOL(r4k_blast_icache);
+
+static void r4k_blast_icache_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 0)
+		r4k_blast_icache = (void *)cache_noop;
+	else if (ic_lsize == 16)
+		r4k_blast_icache = blast_icache16;
+	else if (ic_lsize == 32) {
+		if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) &&
+		    cpu_is_r4600_v1_x())
+			r4k_blast_icache = blast_r4600_v1_icache32;
+		else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV))
+			r4k_blast_icache = tx49_blast_icache32;
+		else if (current_cpu_type() == CPU_LOONGSON2EF)
+			r4k_blast_icache = loongson2_blast_icache32;
+		else
+			r4k_blast_icache = blast_icache32;
+	} else if (ic_lsize == 64)
+		r4k_blast_icache = blast_icache64;
+	else if (ic_lsize == 128)
+		r4k_blast_icache = blast_icache128;
+}
+
+static void (* r4k_blast_scache_page)(unsigned long addr);
+
+static void r4k_blast_scache_page_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (scache_size == 0)
+		r4k_blast_scache_page = (void *)cache_noop;
+	else if (sc_lsize == 16)
+		r4k_blast_scache_page = blast_scache16_page;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_page = blast_scache32_page;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_page = blast_scache64_page;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_page = blast_scache128_page;
+}
+
+static void (* r4k_blast_scache)(void);
+
+static void r4k_blast_scache_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (scache_size == 0)
+		r4k_blast_scache = (void *)cache_noop;
+	else if (sc_lsize == 16)
+		r4k_blast_scache = blast_scache16;
+	else if (sc_lsize == 32)
+		r4k_blast_scache = blast_scache32;
+	else if (sc_lsize == 64)
+		r4k_blast_scache = blast_scache64;
+	else if (sc_lsize == 128)
+		r4k_blast_scache = blast_scache128;
+}
+
+static void (*r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (current_cpu_type() != CPU_LOONGSON64)
+		r4k_blast_scache_node = (void *)cache_noop;
+	else if (sc_lsize == 16)
+		r4k_blast_scache_node = blast_scache16_node;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_node = blast_scache32_node;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_node = blast_scache64_node;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_node = blast_scache128_node;
+}
+
+static inline void local_r4k___flush_cache_all(void * args)
+{
+	switch (current_cpu_type()) {
+	case CPU_LOONGSON2EF:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_R16000:
+		/*
+		 * These caches are inclusive caches, that is, if something
+		 * is not cached in the S-cache, we know it also won't be
+		 * in one of the primary caches.
+		 */
+		r4k_blast_scache();
+		break;
+
+	case CPU_LOONGSON64:
+		/* Use get_ebase_cpunum() for both NUMA=y/n */
+		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+		break;
+
+	case CPU_BMIPS5000:
+		r4k_blast_scache();
+		__sync();
+		break;
+
+	default:
+		r4k_blast_dcache();
+		r4k_blast_icache();
+		break;
+	}
+}
+
+static void r4k___flush_cache_all(void)
+{
+	r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
+}
+
+/**
+ * has_valid_asid() - Determine if an mm already has an ASID.
+ * @mm:		Memory map.
+ * @type:	R4K_HIT or R4K_INDEX, type of cache op.
+ *
+ * Determines whether @mm already has an ASID on any of the CPUs which cache ops
+ * of type @type within an r4k_on_each_cpu() call will affect. If
+ * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the
+ * scope of the operation is confined to sibling CPUs, otherwise all online CPUs
+ * will need to be checked.
+ *
+ * Must be called in non-preemptive context.
+ *
+ * Returns:	1 if the CPUs affected by @type cache ops have an ASID for @mm.
+ *		0 otherwise.
+ */
+static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type)
+{
+	unsigned int i;
+	const cpumask_t *mask = cpu_present_mask;
+
+	if (cpu_has_mmid)
+		return cpu_context(0, mm) != 0;
+
+	/* cpu_sibling_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	/*
+	 * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in
+	 * each foreign core, so we only need to worry about siblings.
+	 * Otherwise we need to worry about all present CPUs.
+	 */
+	if (r4k_op_needs_ipi(type))
+		mask = &cpu_sibling_map[smp_processor_id()];
+#endif
+	for_each_cpu(i, mask)
+		if (cpu_context(i, mm))
+			return 1;
+	return 0;
+}
+
+static void r4k__flush_cache_vmap(void)
+{
+	r4k_blast_dcache();
+}
+
+static void r4k__flush_cache_vunmap(void)
+{
+	r4k_blast_dcache();
+}
+
+/*
+ * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes
+ * whole caches when vma is executable.
+ */
+static inline void local_r4k_flush_cache_range(void * args)
+{
+	struct vm_area_struct *vma = args;
+	int exec = vma->vm_flags & VM_EXEC;
+
+	if (!has_valid_asid(vma->vm_mm, R4K_INDEX))
+		return;
+
+	/*
+	 * If dcache can alias, we must blast it since mapping is changing.
+	 * If executable, we must ensure any dirty lines are written back far
+	 * enough to be visible to icache.
+	 */
+	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
+		r4k_blast_dcache();
+	/* If executable, blast stale lines from icache */
+	if (exec)
+		r4k_blast_icache();
+}
+
+static void r4k_flush_cache_range(struct vm_area_struct *vma,
+	unsigned long start, unsigned long end)
+{
+	int exec = vma->vm_flags & VM_EXEC;
+
+	if (cpu_has_dc_aliases || exec)
+		r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
+}
+
+static inline void local_r4k_flush_cache_mm(void * args)
+{
+	struct mm_struct *mm = args;
+
+	if (!has_valid_asid(mm, R4K_INDEX))
+		return;
+
+	/*
+	 * Kludge alert.  For obscure reasons R4000SC and R4400SC go nuts if we
+	 * only flush the primary caches but R1x000 behave sane ...
+	 * R4000SC and R4400SC indexed S-cache ops also invalidate primary
+	 * caches, so we can bail out early.
+	 */
+	if (current_cpu_type() == CPU_R4000SC ||
+	    current_cpu_type() == CPU_R4000MC ||
+	    current_cpu_type() == CPU_R4400SC ||
+	    current_cpu_type() == CPU_R4400MC) {
+		r4k_blast_scache();
+		return;
+	}
+
+	r4k_blast_dcache();
+}
+
+static void r4k_flush_cache_mm(struct mm_struct *mm)
+{
+	if (!cpu_has_dc_aliases)
+		return;
+
+	r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
+}
+
+struct flush_cache_page_args {
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	unsigned long pfn;
+};
+
+static inline void local_r4k_flush_cache_page(void *args)
+{
+	struct flush_cache_page_args *fcp_args = args;
+	struct vm_area_struct *vma = fcp_args->vma;
+	unsigned long addr = fcp_args->addr;
+	struct page *page = pfn_to_page(fcp_args->pfn);
+	int exec = vma->vm_flags & VM_EXEC;
+	struct mm_struct *mm = vma->vm_mm;
+	int map_coherent = 0;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	void *vaddr;
+
+	/*
+	 * If owns no valid ASID yet, cannot possibly have gotten
+	 * this page into the cache.
+	 */
+	if (!has_valid_asid(mm, R4K_HIT))
+		return;
+
+	addr &= PAGE_MASK;
+	pmdp = pmd_off(mm, addr);
+	ptep = pte_offset_kernel(pmdp, addr);
+
+	/*
+	 * If the page isn't marked valid, the page cannot possibly be
+	 * in the cache.
+	 */
+	if (!(pte_present(*ptep)))
+		return;
+
+	if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID))
+		vaddr = NULL;
+	else {
+		struct folio *folio = page_folio(page);
+		/*
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
+		 */
+		map_coherent = (cpu_has_dc_aliases &&
+				folio_mapped(folio) &&
+				!folio_test_dcache_dirty(folio));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, addr);
+		else
+			vaddr = kmap_atomic(page);
+		addr = (unsigned long)vaddr;
+	}
+
+	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
+		vaddr ? r4k_blast_dcache_page(addr) :
+			r4k_blast_dcache_user_page(addr);
+		if (exec && !cpu_icache_snoops_remote_store)
+			r4k_blast_scache_page(addr);
+	}
+	if (exec) {
+		if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) {
+			drop_mmu_context(mm);
+		} else
+			vaddr ? r4k_blast_icache_page(addr) :
+				r4k_blast_icache_user_page(addr);
+	}
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent();
+		else
+			kunmap_atomic(vaddr);
+	}
+}
+
+static void r4k_flush_cache_page(struct vm_area_struct *vma,
+	unsigned long addr, unsigned long pfn)
+{
+	struct flush_cache_page_args args;
+
+	args.vma = vma;
+	args.addr = addr;
+	args.pfn = pfn;
+
+	r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args);
+}
+
+static inline void local_r4k_flush_data_cache_page(void * addr)
+{
+	r4k_blast_dcache_page((unsigned long) addr);
+}
+
+static void r4k_flush_data_cache_page(unsigned long addr)
+{
+	if (in_atomic())
+		local_r4k_flush_data_cache_page((void *)addr);
+	else
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page,
+				(void *) addr);
+}
+
+struct flush_icache_range_args {
+	unsigned long start;
+	unsigned long end;
+	unsigned int type;
+	bool user;
+};
+
+static inline void __local_r4k_flush_icache_range(unsigned long start,
+						  unsigned long end,
+						  unsigned int type,
+						  bool user)
+{
+	if (!cpu_has_ic_fills_f_dc) {
+		if (type == R4K_INDEX ||
+		    (type & R4K_INDEX && end - start >= dcache_size)) {
+			r4k_blast_dcache();
+		} else {
+			R4600_HIT_CACHEOP_WAR_IMPL;
+			if (user)
+				protected_blast_dcache_range(start, end);
+			else
+				blast_dcache_range(start, end);
+		}
+	}
+
+	if (type == R4K_INDEX ||
+	    (type & R4K_INDEX && end - start > icache_size))
+		r4k_blast_icache();
+	else {
+		switch (boot_cpu_type()) {
+		case CPU_LOONGSON2EF:
+			protected_loongson2_blast_icache_range(start, end);
+			break;
+
+		default:
+			if (user)
+				protected_blast_icache_range(start, end);
+			else
+				blast_icache_range(start, end);
+			break;
+		}
+	}
+}
+
+static inline void local_r4k_flush_icache_range(unsigned long start,
+						unsigned long end)
+{
+	__local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, false);
+}
+
+static inline void local_r4k_flush_icache_user_range(unsigned long start,
+						     unsigned long end)
+{
+	__local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, true);
+}
+
+static inline void local_r4k_flush_icache_range_ipi(void *args)
+{
+	struct flush_icache_range_args *fir_args = args;
+	unsigned long start = fir_args->start;
+	unsigned long end = fir_args->end;
+	unsigned int type = fir_args->type;
+	bool user = fir_args->user;
+
+	__local_r4k_flush_icache_range(start, end, type, user);
+}
+
+static void __r4k_flush_icache_range(unsigned long start, unsigned long end,
+				     bool user)
+{
+	struct flush_icache_range_args args;
+	unsigned long size, cache_size;
+
+	args.start = start;
+	args.end = end;
+	args.type = R4K_HIT | R4K_INDEX;
+	args.user = user;
+
+	/*
+	 * Indexed cache ops require an SMP call.
+	 * Consider if that can or should be avoided.
+	 */
+	preempt_disable();
+	if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) {
+		/*
+		 * If address-based cache ops don't require an SMP call, then
+		 * use them exclusively for small flushes.
+		 */
+		size = end - start;
+		cache_size = icache_size;
+		if (!cpu_has_ic_fills_f_dc) {
+			size *= 2;
+			cache_size += dcache_size;
+		}
+		if (size <= cache_size)
+			args.type &= ~R4K_INDEX;
+	}
+	r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
+	preempt_enable();
+	instruction_hazard();
+}
+
+static void r4k_flush_icache_range(unsigned long start, unsigned long end)
+{
+	return __r4k_flush_icache_range(start, end, false);
+}
+
+static void r4k_flush_icache_user_range(unsigned long start, unsigned long end)
+{
+	return __r4k_flush_icache_range(start, end, true);
+}
+
+#ifdef CONFIG_DMA_NONCOHERENT
+
+static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
+{
+	/* Catch bad driver code */
+	if (WARN_ON(size == 0))
+		return;
+
+	preempt_disable();
+	if (cpu_has_inclusive_pcaches) {
+		if (size >= scache_size) {
+			if (current_cpu_type() != CPU_LOONGSON64)
+				r4k_blast_scache();
+			else
+				r4k_blast_scache_node(pa_to_nid(addr));
+		} else {
+			blast_scache_range(addr, addr + size);
+		}
+		preempt_enable();
+		__sync();
+		return;
+	}
+
+	/*
+	 * Either no secondary cache or the available caches don't have the
+	 * subset property so we have to flush the primary caches
+	 * explicitly.
+	 * If we would need IPI to perform an INDEX-type operation, then
+	 * we have to use the HIT-type alternative as IPI cannot be used
+	 * here due to interrupts possibly being disabled.
+	 */
+	if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
+		r4k_blast_dcache();
+	} else {
+		R4600_HIT_CACHEOP_WAR_IMPL;
+		blast_dcache_range(addr, addr + size);
+	}
+	preempt_enable();
+
+	bc_wback_inv(addr, size);
+	__sync();
+}
+
+static void prefetch_cache_inv(unsigned long addr, unsigned long size)
+{
+	unsigned int linesz = cpu_scache_line_size();
+	unsigned long addr0 = addr, addr1;
+
+	addr0 &= ~(linesz - 1);
+	addr1 = (addr0 + size - 1) & ~(linesz - 1);
+
+	protected_writeback_scache_line(addr0);
+	if (likely(addr1 != addr0))
+		protected_writeback_scache_line(addr1);
+	else
+		return;
+
+	addr0 += linesz;
+	if (likely(addr1 != addr0))
+		protected_writeback_scache_line(addr0);
+	else
+		return;
+
+	addr1 -= linesz;
+	if (likely(addr1 > addr0))
+		protected_writeback_scache_line(addr0);
+}
+
+static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
+{
+	/* Catch bad driver code */
+	if (WARN_ON(size == 0))
+		return;
+
+	preempt_disable();
+
+	if (current_cpu_type() == CPU_BMIPS5000)
+		prefetch_cache_inv(addr, size);
+
+	if (cpu_has_inclusive_pcaches) {
+		if (size >= scache_size) {
+			if (current_cpu_type() != CPU_LOONGSON64)
+				r4k_blast_scache();
+			else
+				r4k_blast_scache_node(pa_to_nid(addr));
+		} else {
+			/*
+			 * There is no clearly documented alignment requirement
+			 * for the cache instruction on MIPS processors and
+			 * some processors, among them the RM5200 and RM7000
+			 * QED processors will throw an address error for cache
+			 * hit ops with insufficient alignment.	 Solved by
+			 * aligning the address to cache line size.
+			 */
+			blast_inv_scache_range(addr, addr + size);
+		}
+		preempt_enable();
+		__sync();
+		return;
+	}
+
+	if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
+		r4k_blast_dcache();
+	} else {
+		R4600_HIT_CACHEOP_WAR_IMPL;
+		blast_inv_dcache_range(addr, addr + size);
+	}
+	preempt_enable();
+
+	bc_inv(addr, size);
+	__sync();
+}
+#endif /* CONFIG_DMA_NONCOHERENT */
+
+static void r4k_flush_icache_all(void)
+{
+	if (cpu_has_vtag_icache)
+		r4k_blast_icache();
+}
+
+struct flush_kernel_vmap_range_args {
+	unsigned long	vaddr;
+	int		size;
+};
+
+static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
+{
+	/*
+	 * Aliases only affect the primary caches so don't bother with
+	 * S-caches or T-caches.
+	 */
+	r4k_blast_dcache();
+}
+
+static inline void local_r4k_flush_kernel_vmap_range(void *args)
+{
+	struct flush_kernel_vmap_range_args *vmra = args;
+	unsigned long vaddr = vmra->vaddr;
+	int size = vmra->size;
+
+	/*
+	 * Aliases only affect the primary caches so don't bother with
+	 * S-caches or T-caches.
+	 */
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	blast_dcache_range(vaddr, vaddr + size);
+}
+
+static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+	struct flush_kernel_vmap_range_args args;
+
+	args.vaddr = (unsigned long) vaddr;
+	args.size = size;
+
+	if (size >= dcache_size)
+		r4k_on_each_cpu(R4K_INDEX,
+				local_r4k_flush_kernel_vmap_range_index, NULL);
+	else
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range,
+				&args);
+}
+
+static inline void rm7k_erratum31(void)
+{
+	const unsigned long ic_lsize = 32;
+	unsigned long addr;
+
+	/* RM7000 erratum #31. The icache is screwed at startup. */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (addr = INDEX_BASE; addr <= INDEX_BASE + 4096; addr += ic_lsize) {
+		__asm__ __volatile__ (
+			".set push\n\t"
+			".set noreorder\n\t"
+			".set mips3\n\t"
+			"cache\t%1, 0(%0)\n\t"
+			"cache\t%1, 0x1000(%0)\n\t"
+			"cache\t%1, 0x2000(%0)\n\t"
+			"cache\t%1, 0x3000(%0)\n\t"
+			"cache\t%2, 0(%0)\n\t"
+			"cache\t%2, 0x1000(%0)\n\t"
+			"cache\t%2, 0x2000(%0)\n\t"
+			"cache\t%2, 0x3000(%0)\n\t"
+			"cache\t%1, 0(%0)\n\t"
+			"cache\t%1, 0x1000(%0)\n\t"
+			"cache\t%1, 0x2000(%0)\n\t"
+			"cache\t%1, 0x3000(%0)\n\t"
+			".set pop\n"
+			:
+			: "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I));
+	}
+}
+
+static inline int alias_74k_erratum(struct cpuinfo_mips *c)
+{
+	unsigned int imp = c->processor_id & PRID_IMP_MASK;
+	unsigned int rev = c->processor_id & PRID_REV_MASK;
+	int present = 0;
+
+	/*
+	 * Early versions of the 74K do not update the cache tags on a
+	 * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG
+	 * aliases.  In this case it is better to treat the cache as always
+	 * having aliases.  Also disable the synonym tag update feature
+	 * where available.  In this case no opportunistic tag update will
+	 * happen where a load causes a virtual address miss but a physical
+	 * address hit during a D-cache look-up.
+	 */
+	switch (imp) {
+	case PRID_IMP_74K:
+		if (rev <= PRID_REV_ENCODE_332(2, 4, 0))
+			present = 1;
+		if (rev == PRID_REV_ENCODE_332(2, 4, 0))
+			write_c0_config6(read_c0_config6() | MTI_CONF6_SYND);
+		break;
+	case PRID_IMP_1074K:
+		if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) {
+			present = 1;
+			write_c0_config6(read_c0_config6() | MTI_CONF6_SYND);
+		}
+		break;
+	default:
+		BUG();
+	}
+
+	return present;
+}
+
+static void b5k_instruction_hazard(void)
+{
+	__sync();
+	__sync();
+	__asm__ __volatile__(
+	"       nop; nop; nop; nop; nop; nop; nop; nop\n"
+	"       nop; nop; nop; nop; nop; nop; nop; nop\n"
+	"       nop; nop; nop; nop; nop; nop; nop; nop\n"
+	"       nop; nop; nop; nop; nop; nop; nop; nop\n"
+	: : : "memory");
+}
+
+static char *way_string[] = { NULL, "direct mapped", "2-way",
+	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way",
+	"9-way", "10-way", "11-way", "12-way",
+	"13-way", "14-way", "15-way", "16-way",
+};
+
+static void probe_pcache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config = read_c0_config();
+	unsigned int prid = read_c0_prid();
+	int has_74k_erratum = 0;
+	unsigned long config1;
+	unsigned int lsize;
+
+	switch (current_cpu_type()) {
+	case CPU_R4600:			/* QED style two way caches? */
+	case CPU_R4700:
+	case CPU_R5000:
+	case CPU_NEVADA:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 2;
+		c->icache.waybit = __ffs(icache_size/2);
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 2;
+		c->dcache.waybit= __ffs(dcache_size/2);
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_R5500:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 2;
+		c->icache.waybit= 0;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 2;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P | MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_TX49XX:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 4;
+		c->icache.waybit= 0;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 4;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+	case CPU_R4300:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 1;
+		c->icache.waybit = 0;	/* doesn't matter */
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 1;
+		c->dcache.waybit = 0;	/* does not matter */
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_R16000:
+		icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29));
+		c->icache.linesz = 64;
+		c->icache.ways = 2;
+		c->icache.waybit = 0;
+
+		dcache_size = 1 << (12 + ((config & R10K_CONF_DC) >> 26));
+		c->dcache.linesz = 32;
+		c->dcache.ways = 2;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_RM7000:
+		rm7k_erratum31();
+
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 4;
+		c->icache.waybit = __ffs(icache_size / c->icache.ways);
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 4;
+		c->dcache.waybit = __ffs(dcache_size / c->dcache.ways);
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_LOONGSON2EF:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		if (prid & 0x3)
+			c->icache.ways = 4;
+		else
+			c->icache.ways = 2;
+		c->icache.waybit = 0;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		if (prid & 0x3)
+			c->dcache.ways = 4;
+		else
+			c->dcache.ways = 2;
+		c->dcache.waybit = 0;
+		break;
+
+	case CPU_LOONGSON64:
+		config1 = read_c0_config1();
+		lsize = (config1 >> 19) & 7;
+		if (lsize)
+			c->icache.linesz = 2 << lsize;
+		else
+			c->icache.linesz = 0;
+		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.ways = 1 + ((config1 >> 16) & 7);
+		icache_size = c->icache.sets *
+					  c->icache.ways *
+					  c->icache.linesz;
+		c->icache.waybit = 0;
+
+		lsize = (config1 >> 10) & 7;
+		if (lsize)
+			c->dcache.linesz = 2 << lsize;
+		else
+			c->dcache.linesz = 0;
+		c->dcache.sets = 64 << ((config1 >> 13) & 7);
+		c->dcache.ways = 1 + ((config1 >> 7) & 7);
+		dcache_size = c->dcache.sets *
+					  c->dcache.ways *
+					  c->dcache.linesz;
+		c->dcache.waybit = 0;
+		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) ||
+				(c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R)
+			c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_CAVIUM_OCTEON3:
+		/* For now lie about the number of ways. */
+		c->icache.linesz = 128;
+		c->icache.sets = 16;
+		c->icache.ways = 8;
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz;
+
+		c->dcache.linesz = 128;
+		c->dcache.ways = 8;
+		c->dcache.sets = 8;
+		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	default:
+		if (!(config & MIPS_CONF_M))
+			panic("Don't know how to probe P-caches on this cpu.");
+
+		/*
+		 * So we seem to be a MIPS32 or MIPS64 CPU
+		 * So let's probe the I-cache ...
+		 */
+		config1 = read_c0_config1();
+
+		lsize = (config1 >> 19) & 7;
+
+		/* IL == 7 is reserved */
+		if (lsize == 7)
+			panic("Invalid icache line size");
+
+		c->icache.linesz = lsize ? 2 << lsize : 0;
+
+		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);
+		c->icache.ways = 1 + ((config1 >> 16) & 7);
+
+		icache_size = c->icache.sets *
+			      c->icache.ways *
+			      c->icache.linesz;
+		c->icache.waybit = __ffs(icache_size/c->icache.ways);
+
+		if (config & MIPS_CONF_VI)
+			c->icache.flags |= MIPS_CACHE_VTAG;
+
+		/*
+		 * Now probe the MIPS32 / MIPS64 data cache.
+		 */
+		c->dcache.flags = 0;
+
+		lsize = (config1 >> 10) & 7;
+
+		/* DL == 7 is reserved */
+		if (lsize == 7)
+			panic("Invalid dcache line size");
+
+		c->dcache.linesz = lsize ? 2 << lsize : 0;
+
+		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);
+		c->dcache.ways = 1 + ((config1 >> 7) & 7);
+
+		dcache_size = c->dcache.sets *
+			      c->dcache.ways *
+			      c->dcache.linesz;
+		c->dcache.waybit = __ffs(dcache_size/c->dcache.ways);
+
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+	}
+
+	/*
+	 * Processor configuration sanity check for the R4000SC erratum
+	 * #5.	With page sizes larger than 32kB there is no possibility
+	 * to get a VCE exception anymore so we don't care about this
+	 * misconfiguration.  The case is rather theoretical anyway;
+	 * presumably no vendor is shipping his hardware in the "bad"
+	 * configuration.
+	 */
+	if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 &&
+	    (prid & PRID_REV_MASK) < PRID_REV_R4400 &&
+	    !(config & CONF_SC) && c->icache.linesz != 16 &&
+	    PAGE_SIZE <= 0x8000)
+		panic("Improper R4000SC processor configuration detected");
+
+	/* compute a couple of other cache variables */
+	c->icache.waysize = icache_size / c->icache.ways;
+	c->dcache.waysize = dcache_size / c->dcache.ways;
+
+	c->icache.sets = c->icache.linesz ?
+		icache_size / (c->icache.linesz * c->icache.ways) : 0;
+	c->dcache.sets = c->dcache.linesz ?
+		dcache_size / (c->dcache.linesz * c->dcache.ways) : 0;
+
+	/*
+	 * R1x000 P-caches are odd in a positive way.  They're 32kB 2-way
+	 * virtually indexed so normally would suffer from aliases.  So
+	 * normally they'd suffer from aliases but magic in the hardware deals
+	 * with that for us so we don't need to take care ourselves.
+	 */
+	switch (current_cpu_type()) {
+	case CPU_20KC:
+	case CPU_25KF:
+	case CPU_I6400:
+	case CPU_I6500:
+	case CPU_SB1:
+	case CPU_SB1A:
+		c->dcache.flags |= MIPS_CACHE_PINDEX;
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_R16000:
+		break;
+
+	case CPU_74K:
+	case CPU_1074K:
+		has_74k_erratum = alias_74k_erratum(c);
+		fallthrough;
+	case CPU_M14KC:
+	case CPU_M14KEC:
+	case CPU_24K:
+	case CPU_34K:
+	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_P5600:
+	case CPU_PROAPTIV:
+	case CPU_M5150:
+	case CPU_QEMU_GENERIC:
+	case CPU_P6600:
+	case CPU_M6250:
+		if (!(read_c0_config7() & MIPS_CONF7_IAR) &&
+		    (c->icache.waysize > PAGE_SIZE))
+			c->icache.flags |= MIPS_CACHE_ALIASES;
+		if (!has_74k_erratum && (read_c0_config7() & MIPS_CONF7_AR)) {
+			/*
+			 * Effectively physically indexed dcache,
+			 * thus no virtual aliases.
+			*/
+			c->dcache.flags |= MIPS_CACHE_PINDEX;
+			break;
+		}
+		fallthrough;
+	default:
+		if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE)
+			c->dcache.flags |= MIPS_CACHE_ALIASES;
+	}
+
+	/* Physically indexed caches don't suffer from virtual aliasing */
+	if (c->dcache.flags & MIPS_CACHE_PINDEX)
+		c->dcache.flags &= ~MIPS_CACHE_ALIASES;
+
+	/*
+	 * In systems with CM the icache fills from L2 or closer caches, and
+	 * thus sees remote stores without needing to write them back any
+	 * further than that.
+	 */
+	if (mips_cm_present())
+		c->icache.flags |= MIPS_IC_SNOOPS_REMOTE;
+
+	switch (current_cpu_type()) {
+	case CPU_20KC:
+		/*
+		 * Some older 20Kc chips doesn't have the 'VI' bit in
+		 * the config register.
+		 */
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		break;
+
+	case CPU_ALCHEMY:
+	case CPU_I6400:
+	case CPU_I6500:
+		c->icache.flags |= MIPS_CACHE_IC_F_DC;
+		break;
+
+	case CPU_BMIPS5000:
+		c->icache.flags |= MIPS_CACHE_IC_F_DC;
+		/* Cache aliases are handled in hardware; allow HIGHMEM */
+		c->dcache.flags &= ~MIPS_CACHE_ALIASES;
+		break;
+
+	case CPU_LOONGSON2EF:
+		/*
+		 * LOONGSON2 has 4 way icache, but when using indexed cache op,
+		 * one op will act on all 4 ways
+		 */
+		c->icache.ways = 1;
+	}
+
+	pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n",
+		icache_size >> 10,
+		c->icache.flags & MIPS_CACHE_VTAG ? "VIVT" : "VIPT",
+		way_string[c->icache.ways], c->icache.linesz);
+
+	pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n",
+		dcache_size >> 10, way_string[c->dcache.ways],
+		(c->dcache.flags & MIPS_CACHE_PINDEX) ? "PIPT" : "VIPT",
+		(c->dcache.flags & MIPS_CACHE_ALIASES) ?
+			"cache aliases" : "no aliases",
+		c->dcache.linesz);
+}
+
+static void probe_vcache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config2, lsize;
+
+	if (current_cpu_type() != CPU_LOONGSON64)
+		return;
+
+	config2 = read_c0_config2();
+	if ((lsize = ((config2 >> 20) & 15)))
+		c->vcache.linesz = 2 << lsize;
+	else
+		c->vcache.linesz = lsize;
+
+	c->vcache.sets = 64 << ((config2 >> 24) & 15);
+	c->vcache.ways = 1 + ((config2 >> 16) & 15);
+
+	vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz;
+
+	c->vcache.waybit = 0;
+	c->vcache.waysize = vcache_size / c->vcache.ways;
+
+	pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
+		vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
+}
+
+/*
+ * If you even _breathe_ on this function, look at the gcc output and make sure
+ * it does not pop things on and off the stack for the cache sizing loop that
+ * executes in KSEG1 space or else you will crash and burn badly.  You have
+ * been warned.
+ */
+static int probe_scache(void)
+{
+	unsigned long flags, addr, begin, end, pow2;
+	unsigned int config = read_c0_config();
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	if (config & CONF_SC)
+		return 0;
+
+	begin = (unsigned long) &_stext;
+	begin &= ~((4 * 1024 * 1024) - 1);
+	end = begin + (4 * 1024 * 1024);
+
+	/*
+	 * This is such a bitch, you'd think they would make it easy to do
+	 * this.  Away you daemons of stupidity!
+	 */
+	local_irq_save(flags);
+
+	/* Fill each size-multiple cache line with a valid tag. */
+	pow2 = (64 * 1024);
+	for (addr = begin; addr < end; addr = (begin + pow2)) {
+		unsigned long *p = (unsigned long *) addr;
+		__asm__ __volatile__("nop" : : "r" (*p)); /* whee... */
+		pow2 <<= 1;
+	}
+
+	/* Load first line with zero (therefore invalid) tag. */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+	__asm__ __volatile__("nop; nop; nop; nop;"); /* avoid the hazard */
+	cache_op(Index_Store_Tag_I, begin);
+	cache_op(Index_Store_Tag_D, begin);
+	cache_op(Index_Store_Tag_SD, begin);
+
+	/* Now search for the wrap around point. */
+	pow2 = (128 * 1024);
+	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {
+		cache_op(Index_Load_Tag_SD, addr);
+		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */
+		if (!read_c0_taglo())
+			break;
+		pow2 <<= 1;
+	}
+	local_irq_restore(flags);
+	addr -= begin;
+
+	scache_size = addr;
+	c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
+	c->scache.ways = 1;
+	c->scache.waybit = 0;		/* does not matter */
+
+	return 1;
+}
+
+static void loongson2_sc_init(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	scache_size = 512*1024;
+	c->scache.linesz = 32;
+	c->scache.ways = 4;
+	c->scache.waybit = 0;
+	c->scache.waysize = scache_size / (c->scache.ways);
+	c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways);
+	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
+	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
+
+	c->options |= MIPS_CPU_INCLUSIVE_CACHES;
+}
+
+static void loongson3_sc_init(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config2, lsize;
+
+	config2 = read_c0_config2();
+	lsize = (config2 >> 4) & 15;
+	if (lsize)
+		c->scache.linesz = 2 << lsize;
+	else
+		c->scache.linesz = 0;
+	c->scache.sets = 64 << ((config2 >> 8) & 15);
+	c->scache.ways = 1 + (config2 & 15);
+
+	/* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */
+	if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R)
+		c->scache.sets *= 2;
+	else
+		c->scache.sets *= 4;
+
+	scache_size = c->scache.sets * c->scache.ways * c->scache.linesz;
+
+	c->scache.waybit = 0;
+	c->scache.waysize = scache_size / c->scache.ways;
+	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
+	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
+	if (scache_size)
+		c->options |= MIPS_CPU_INCLUSIVE_CACHES;
+	return;
+}
+
+extern int r5k_sc_init(void);
+extern int rm7k_sc_init(void);
+extern int mips_sc_init(void);
+
+static void setup_scache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config = read_c0_config();
+	int sc_present = 0;
+
+	/*
+	 * Do the probing thing on R4000SC and R4400SC processors.  Other
+	 * processors don't have a S-cache that would be relevant to the
+	 * Linux memory management.
+	 */
+	switch (current_cpu_type()) {
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		sc_present = run_uncached(probe_scache);
+		if (sc_present)
+			c->options |= MIPS_CPU_CACHE_CDEX_S;
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_R16000:
+		scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16);
+		c->scache.linesz = 64 << ((config >> 13) & 1);
+		c->scache.ways = 2;
+		c->scache.waybit= 0;
+		sc_present = 1;
+		break;
+
+	case CPU_R5000:
+	case CPU_NEVADA:
+#ifdef CONFIG_R5000_CPU_SCACHE
+		r5k_sc_init();
+#endif
+		return;
+
+	case CPU_RM7000:
+#ifdef CONFIG_RM7000_CPU_SCACHE
+		rm7k_sc_init();
+#endif
+		return;
+
+	case CPU_LOONGSON2EF:
+		loongson2_sc_init();
+		return;
+
+	case CPU_LOONGSON64:
+		loongson3_sc_init();
+		return;
+
+	case CPU_CAVIUM_OCTEON3:
+		/* don't need to worry about L2, fully coherent */
+		return;
+
+	default:
+		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+				    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+				    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+				    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+#ifdef CONFIG_MIPS_CPU_SCACHE
+			if (mips_sc_init ()) {
+				scache_size = c->scache.ways * c->scache.sets * c->scache.linesz;
+				printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n",
+				       scache_size >> 10,
+				       way_string[c->scache.ways], c->scache.linesz);
+
+				if (current_cpu_type() == CPU_BMIPS5000)
+					c->options |= MIPS_CPU_INCLUSIVE_CACHES;
+			}
+
+#else
+			if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT))
+				panic("Dunno how to handle MIPS32 / MIPS64 second level cache");
+#endif
+			return;
+		}
+		sc_present = 0;
+	}
+
+	if (!sc_present)
+		return;
+
+	/* compute a couple of other cache variables */
+	c->scache.waysize = scache_size / c->scache.ways;
+
+	c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways);
+
+	printk("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
+	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
+
+	c->options |= MIPS_CPU_INCLUSIVE_CACHES;
+}
+
+void au1x00_fixup_config_od(void)
+{
+	/*
+	 * c0_config.od (bit 19) was write only (and read as 0)
+	 * on the early revisions of Alchemy SOCs.  It disables the bus
+	 * transaction overlapping and needs to be set to fix various errata.
+	 */
+	switch (read_c0_prid()) {
+	case 0x00030100: /* Au1000 DA */
+	case 0x00030201: /* Au1000 HA */
+	case 0x00030202: /* Au1000 HB */
+	case 0x01030200: /* Au1500 AB */
+	/*
+	 * Au1100 errata actually keeps silence about this bit, so we set it
+	 * just in case for those revisions that require it to be set according
+	 * to the (now gone) cpu table.
+	 */
+	case 0x02030200: /* Au1100 AB */
+	case 0x02030201: /* Au1100 BA */
+	case 0x02030202: /* Au1100 BC */
+		set_c0_config(1 << 19);
+		break;
+	}
+}
+
+/* CP0 hazard avoidance. */
+#define NXP_BARRIER()							\
+	 __asm__ __volatile__(						\
+	".set noreorder\n\t"						\
+	"nop; nop; nop; nop; nop; nop;\n\t"				\
+	".set reorder\n\t")
+
+static void nxp_pr4450_fixup_config(void)
+{
+	unsigned long config0;
+
+	config0 = read_c0_config();
+
+	/* clear all three cache coherency fields */
+	config0 &= ~(0x7 | (7 << 25) | (7 << 28));
+	config0 |= (((_page_cachable_default >> _CACHE_SHIFT) <<  0) |
+		    ((_page_cachable_default >> _CACHE_SHIFT) << 25) |
+		    ((_page_cachable_default >> _CACHE_SHIFT) << 28));
+	write_c0_config(config0);
+	NXP_BARRIER();
+}
+
+static int cca = -1;
+
+static int __init cca_setup(char *str)
+{
+	get_option(&str, &cca);
+
+	return 0;
+}
+
+early_param("cca", cca_setup);
+
+static void coherency_setup(void)
+{
+	if (cca < 0 || cca > 7)
+		cca = read_c0_config() & CONF_CM_CMASK;
+	_page_cachable_default = cca << _CACHE_SHIFT;
+
+	pr_debug("Using cache attribute %d\n", cca);
+	change_c0_config(CONF_CM_CMASK, cca);
+
+	/*
+	 * c0_status.cu=0 specifies that updates by the sc instruction use
+	 * the coherency mode specified by the TLB; 1 means cachable
+	 * coherent update on write will be used.  Not all processors have
+	 * this bit and; some wire it to zero, others like Toshiba had the
+	 * silly idea of putting something else there ...
+	 */
+	switch (current_cpu_type()) {
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		clear_c0_config(CONF_CU);
+		break;
+	/*
+	 * We need to catch the early Alchemy SOCs with
+	 * the write-only co_config.od bit and set it back to one on:
+	 * Au1000 rev DA, HA, HB;  Au1100 AB, BA, BC, Au1500 AB
+	 */
+	case CPU_ALCHEMY:
+		au1x00_fixup_config_od();
+		break;
+
+	case PRID_IMP_PR4450:
+		nxp_pr4450_fixup_config();
+		break;
+	}
+}
+
+static void r4k_cache_error_setup(void)
+{
+	extern char __weak except_vec2_generic;
+	extern char __weak except_vec2_sb1;
+
+	switch (current_cpu_type()) {
+	case CPU_SB1:
+	case CPU_SB1A:
+		set_uncached_handler(0x100, &except_vec2_sb1, 0x80);
+		break;
+
+	default:
+		set_uncached_handler(0x100, &except_vec2_generic, 0x80);
+		break;
+	}
+}
+
+void r4k_cache_init(void)
+{
+	extern void build_clear_page(void);
+	extern void build_copy_page(void);
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	probe_pcache();
+	probe_vcache();
+	setup_scache();
+
+	r4k_blast_dcache_page_setup();
+	r4k_blast_dcache_setup();
+	r4k_blast_icache_page_setup();
+	r4k_blast_icache_setup();
+	r4k_blast_scache_page_setup();
+	r4k_blast_scache_setup();
+	r4k_blast_scache_node_setup();
+#ifdef CONFIG_EVA
+	r4k_blast_dcache_user_page_setup();
+	r4k_blast_icache_user_page_setup();
+#endif
+
+	/*
+	 * Some MIPS32 and MIPS64 processors have physically indexed caches.
+	 * This code supports virtually indexed processors and will be
+	 * unnecessarily inefficient on physically indexed processors.
+	 */
+	if (c->dcache.linesz && cpu_has_dc_aliases)
+		shm_align_mask = max_t( unsigned long,
+					c->dcache.sets * c->dcache.linesz - 1,
+					PAGE_SIZE - 1);
+	else
+		shm_align_mask = PAGE_SIZE-1;
+
+	__flush_cache_vmap	= r4k__flush_cache_vmap;
+	__flush_cache_vunmap	= r4k__flush_cache_vunmap;
+
+	flush_cache_all		= cache_noop;
+	__flush_cache_all	= r4k___flush_cache_all;
+	flush_cache_mm		= r4k_flush_cache_mm;
+	flush_cache_page	= r4k_flush_cache_page;
+	flush_cache_range	= r4k_flush_cache_range;
+
+	__flush_kernel_vmap_range = r4k_flush_kernel_vmap_range;
+
+	flush_icache_all	= r4k_flush_icache_all;
+	flush_data_cache_page	= r4k_flush_data_cache_page;
+	flush_icache_range	= r4k_flush_icache_range;
+	local_flush_icache_range	= local_r4k_flush_icache_range;
+	__flush_icache_user_range	= r4k_flush_icache_user_range;
+	__local_flush_icache_user_range	= local_r4k_flush_icache_user_range;
+
+#ifdef CONFIG_DMA_NONCOHERENT
+	_dma_cache_wback_inv	= r4k_dma_cache_wback_inv;
+	_dma_cache_wback	= r4k_dma_cache_wback_inv;
+	_dma_cache_inv		= r4k_dma_cache_inv;
+#endif /* CONFIG_DMA_NONCOHERENT */
+
+	build_clear_page();
+	build_copy_page();
+
+	/*
+	 * We want to run CMP kernels on core with and without coherent
+	 * caches. Therefore, do not use CONFIG_MIPS_CMP to decide whether
+	 * or not to flush caches.
+	 */
+	local_r4k___flush_cache_all(NULL);
+
+	coherency_setup();
+	board_cache_error_setup = r4k_cache_error_setup;
+
+	/*
+	 * Per-CPU overrides
+	 */
+	switch (current_cpu_type()) {
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+		/* No IPI is needed because all CPUs share the same D$ */
+		flush_data_cache_page = r4k_blast_dcache_page;
+		break;
+	case CPU_BMIPS5000:
+		/* We lose our superpowers if L2 is disabled */
+		if (c->scache.flags & MIPS_CACHE_NOT_PRESENT)
+			break;
+
+		/* I$ fills from D$ just by emptying the write buffers */
+		flush_cache_page = (void *)b5k_instruction_hazard;
+		flush_cache_range = (void *)b5k_instruction_hazard;
+		flush_data_cache_page = (void *)b5k_instruction_hazard;
+		flush_icache_range = (void *)b5k_instruction_hazard;
+		local_flush_icache_range = (void *)b5k_instruction_hazard;
+
+
+		/* Optimization: an L2 flush implicitly flushes the L1 */
+		current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES;
+		break;
+	case CPU_LOONGSON64:
+		/* Loongson-3 maintains cache coherency by hardware */
+		__flush_cache_all	= cache_noop;
+		__flush_cache_vmap	= cache_noop;
+		__flush_cache_vunmap	= cache_noop;
+		__flush_kernel_vmap_range = (void *)cache_noop;
+		flush_cache_mm		= (void *)cache_noop;
+		flush_cache_page	= (void *)cache_noop;
+		flush_cache_range	= (void *)cache_noop;
+		flush_icache_all	= (void *)cache_noop;
+		flush_data_cache_page	= (void *)cache_noop;
+		break;
+	}
+}
+
+static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			       void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		coherency_setup();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block r4k_cache_pm_notifier_block = {
+	.notifier_call = r4k_cache_pm_notifier,
+};
+
+int __init r4k_cache_init_pm(void)
+{
+	return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block);
+}
+arch_initcall(r4k_cache_init_pm);
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
new file mode 100644
index 0000000000..7f830634db
--- /dev/null
+++ b/arch/mips/mm/cache.c
@@ -0,0 +1,226 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ */
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/kernel.h>
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+
+#include <asm/bcache.h>
+#include <asm/cacheflush.h>
+#include <asm/processor.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+
+/* Cache operations. */
+void (*flush_cache_all)(void);
+void (*__flush_cache_all)(void);
+EXPORT_SYMBOL_GPL(__flush_cache_all);
+void (*flush_cache_mm)(struct mm_struct *mm);
+void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
+	unsigned long pfn);
+void (*flush_icache_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(flush_icache_range);
+void (*local_flush_icache_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(local_flush_icache_range);
+void (*__flush_icache_user_range)(unsigned long start, unsigned long end);
+void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(__local_flush_icache_user_range);
+
+void (*__flush_cache_vmap)(void);
+void (*__flush_cache_vunmap)(void);
+
+void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
+EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range);
+
+/* MIPS specific cache operations */
+void (*flush_data_cache_page)(unsigned long addr);
+void (*flush_icache_all)(void);
+
+EXPORT_SYMBOL(flush_data_cache_page);
+EXPORT_SYMBOL(flush_icache_all);
+
+/*
+ * Dummy cache handling routine
+ */
+
+void cache_noop(void) {}
+
+#ifdef CONFIG_BOARD_SCACHE
+
+static struct bcache_ops no_sc_ops = {
+	.bc_enable = (void *)cache_noop,
+	.bc_disable = (void *)cache_noop,
+	.bc_wback_inv = (void *)cache_noop,
+	.bc_inv = (void *)cache_noop
+};
+
+struct bcache_ops *bcops = &no_sc_ops;
+#endif
+
+#ifdef CONFIG_DMA_NONCOHERENT
+
+/* DMA cache operations. */
+void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
+void (*_dma_cache_wback)(unsigned long start, unsigned long size);
+void (*_dma_cache_inv)(unsigned long start, unsigned long size);
+
+#endif /* CONFIG_DMA_NONCOHERENT */
+
+/*
+ * We could optimize the case where the cache argument is not BCACHE but
+ * that seems very atypical use ...
+ */
+SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
+	unsigned int, cache)
+{
+	if (bytes == 0)
+		return 0;
+	if (!access_ok((void __user *) addr, bytes))
+		return -EFAULT;
+
+	__flush_icache_user_range(addr, addr + bytes);
+
+	return 0;
+}
+
+void __flush_dcache_pages(struct page *page, unsigned int nr)
+{
+	struct folio *folio = page_folio(page);
+	struct address_space *mapping = folio_flush_mapping(folio);
+	unsigned long addr;
+	unsigned int i;
+
+	if (mapping && !mapping_mapped(mapping)) {
+		folio_set_dcache_dirty(folio);
+		return;
+	}
+
+	/*
+	 * We could delay the flush for the !page_mapping case too.  But that
+	 * case is for exec env/arg pages and those are %99 certainly going to
+	 * get faulted into the tlb (and thus flushed) anyways.
+	 */
+	for (i = 0; i < nr; i++) {
+		addr = (unsigned long)kmap_local_page(nth_page(page, i));
+		flush_data_cache_page(addr);
+		kunmap_local((void *)addr);
+	}
+}
+EXPORT_SYMBOL(__flush_dcache_pages);
+
+void __flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+	struct folio *folio = page_folio(page);
+
+	if (pages_do_alias(addr, vmaddr)) {
+		if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
+			void *kaddr;
+
+			kaddr = kmap_coherent(page, vmaddr);
+			flush_data_cache_page((unsigned long)kaddr);
+			kunmap_coherent();
+		} else
+			flush_data_cache_page(addr);
+	}
+}
+
+EXPORT_SYMBOL(__flush_anon_page);
+
+void __update_cache(unsigned long address, pte_t pte)
+{
+	struct folio *folio;
+	unsigned long pfn, addr;
+	int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
+	unsigned int i;
+
+	pfn = pte_pfn(pte);
+	if (unlikely(!pfn_valid(pfn)))
+		return;
+
+	folio = page_folio(pfn_to_page(pfn));
+	address &= PAGE_MASK;
+	address -= offset_in_folio(folio, pfn << PAGE_SHIFT);
+
+	if (folio_test_dcache_dirty(folio)) {
+		for (i = 0; i < folio_nr_pages(folio); i++) {
+			addr = (unsigned long)kmap_local_folio(folio, i);
+
+			if (exec || pages_do_alias(addr, address))
+				flush_data_cache_page(addr);
+			kunmap_local((void *)addr);
+			address += PAGE_SIZE;
+		}
+		folio_clear_dcache_dirty(folio);
+	}
+}
+
+unsigned long _page_cachable_default;
+EXPORT_SYMBOL(_page_cachable_default);
+
+#define PM(p)	__pgprot(_page_cachable_default | (p))
+
+static pgprot_t protection_map[16] __ro_after_init;
+DECLARE_VM_GET_PAGE_PROT
+
+static inline void setup_protection_map(void)
+{
+	protection_map[0]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[1]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[2]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[3]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[4]  = PM(_PAGE_PRESENT);
+	protection_map[5]  = PM(_PAGE_PRESENT);
+	protection_map[6]  = PM(_PAGE_PRESENT);
+	protection_map[7]  = PM(_PAGE_PRESENT);
+
+	protection_map[8]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[9]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+				_PAGE_NO_READ);
+	protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+	protection_map[12] = PM(_PAGE_PRESENT);
+	protection_map[13] = PM(_PAGE_PRESENT);
+	protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+	protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+}
+
+#undef PM
+
+void cpu_cache_init(void)
+{
+	if (cpu_has_3k_cache) {
+		extern void __weak r3k_cache_init(void);
+
+		r3k_cache_init();
+	}
+	if (cpu_has_4k_cache) {
+		extern void __weak r4k_cache_init(void);
+
+		r4k_cache_init();
+	}
+
+	if (cpu_has_octeon_cache) {
+		extern void __weak octeon_cache_init(void);
+
+		octeon_cache_init();
+	}
+
+	setup_protection_map();
+}
diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c
new file mode 100644
index 0000000000..a3c02df19f
--- /dev/null
+++ b/arch/mips/mm/cerr-sb1.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001,2002,2003 Broadcom Corporation
+ */
+#include <linux/sched.h>
+#include <asm/mipsregs.h>
+#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
+
+#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE)
+#include <asm/io.h>
+#include <asm/sibyte/sb1250_scd.h>
+#endif
+
+/*
+ * We'd like to dump the L2_ECC_TAG register on errors, but errata make
+ * that unsafe... So for now we don't.	(BCM1250/BCM112x erratum SOC-48.)
+ */
+#undef DUMP_L2_ECC_TAG_ON_ERROR
+
+/* SB1 definitions */
+
+/* XXX should come from config1 XXX */
+#define SB1_CACHE_INDEX_MASK   0x1fe0
+
+#define CP0_ERRCTL_RECOVERABLE (1 << 31)
+#define CP0_ERRCTL_DCACHE      (1 << 30)
+#define CP0_ERRCTL_ICACHE      (1 << 29)
+#define CP0_ERRCTL_MULTIBUS    (1 << 23)
+#define CP0_ERRCTL_MC_TLB      (1 << 15)
+#define CP0_ERRCTL_MC_TIMEOUT  (1 << 14)
+
+#define CP0_CERRI_TAG_PARITY   (1 << 29)
+#define CP0_CERRI_DATA_PARITY  (1 << 28)
+#define CP0_CERRI_EXTERNAL     (1 << 26)
+
+#define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL))
+#define CP0_CERRI_DATA	       (CP0_CERRI_DATA_PARITY)
+
+#define CP0_CERRD_MULTIPLE     (1 << 31)
+#define CP0_CERRD_TAG_STATE    (1 << 30)
+#define CP0_CERRD_TAG_ADDRESS  (1 << 29)
+#define CP0_CERRD_DATA_SBE     (1 << 28)
+#define CP0_CERRD_DATA_DBE     (1 << 27)
+#define CP0_CERRD_EXTERNAL     (1 << 26)
+#define CP0_CERRD_LOAD	       (1 << 25)
+#define CP0_CERRD_STORE	       (1 << 24)
+#define CP0_CERRD_FILLWB       (1 << 23)
+#define CP0_CERRD_COHERENCY    (1 << 22)
+#define CP0_CERRD_DUPTAG       (1 << 21)
+
+#define CP0_CERRD_DPA_VALID(c) (!((c) & CP0_CERRD_EXTERNAL))
+#define CP0_CERRD_IDX_VALID(c) \
+   (((c) & (CP0_CERRD_LOAD | CP0_CERRD_STORE)) ? (!((c) & CP0_CERRD_EXTERNAL)) : 0)
+#define CP0_CERRD_CAUSES \
+   (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG)
+#define CP0_CERRD_TYPES \
+   (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL)
+#define CP0_CERRD_DATA	       (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE)
+
+static uint32_t extract_ic(unsigned short addr, int data);
+static uint32_t extract_dc(unsigned short addr, int data);
+
+static inline void breakout_errctl(unsigned int val)
+{
+	if (val & CP0_ERRCTL_RECOVERABLE)
+		printk(" recoverable");
+	if (val & CP0_ERRCTL_DCACHE)
+		printk(" dcache");
+	if (val & CP0_ERRCTL_ICACHE)
+		printk(" icache");
+	if (val & CP0_ERRCTL_MULTIBUS)
+		printk(" multiple-buserr");
+	printk("\n");
+}
+
+static inline void breakout_cerri(unsigned int val)
+{
+	if (val & CP0_CERRI_TAG_PARITY)
+		printk(" tag-parity");
+	if (val & CP0_CERRI_DATA_PARITY)
+		printk(" data-parity");
+	if (val & CP0_CERRI_EXTERNAL)
+		printk(" external");
+	printk("\n");
+}
+
+static inline void breakout_cerrd(unsigned int val)
+{
+	switch (val & CP0_CERRD_CAUSES) {
+	case CP0_CERRD_LOAD:
+		printk(" load,");
+		break;
+	case CP0_CERRD_STORE:
+		printk(" store,");
+		break;
+	case CP0_CERRD_FILLWB:
+		printk(" fill/wb,");
+		break;
+	case CP0_CERRD_COHERENCY:
+		printk(" coherency,");
+		break;
+	case CP0_CERRD_DUPTAG:
+		printk(" duptags,");
+		break;
+	default:
+		printk(" NO CAUSE,");
+		break;
+	}
+	if (!(val & CP0_CERRD_TYPES))
+		printk(" NO TYPE");
+	else {
+		if (val & CP0_CERRD_MULTIPLE)
+			printk(" multi-err");
+		if (val & CP0_CERRD_TAG_STATE)
+			printk(" tag-state");
+		if (val & CP0_CERRD_TAG_ADDRESS)
+			printk(" tag-address");
+		if (val & CP0_CERRD_DATA_SBE)
+			printk(" data-SBE");
+		if (val & CP0_CERRD_DATA_DBE)
+			printk(" data-DBE");
+		if (val & CP0_CERRD_EXTERNAL)
+			printk(" external");
+	}
+	printk("\n");
+}
+
+#ifndef CONFIG_SIBYTE_BUS_WATCHER
+
+static void check_bus_watcher(void)
+{
+	uint32_t status, l2_err, memio_err;
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+	uint64_t l2_tag;
+#endif
+
+	/* Destructive read, clears register and interrupt */
+	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
+	/* Bit 31 is always on, but there's no #define for that */
+	if (status & ~(1UL << 31)) {
+		l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+		l2_tag = in64(IOADDR(A_L2_ECC_TAG));
+#endif
+		memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
+		printk("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
+		printk("\nLast recorded signature:\n");
+		printk("Request %02x from %d, answered by %d with Dcode %d\n",
+		       (unsigned int)(G_SCD_BERR_TID(status) & 0x3f),
+		       (int)(G_SCD_BERR_TID(status) >> 6),
+		       (int)G_SCD_BERR_RID(status),
+		       (int)G_SCD_BERR_DCODE(status));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+		printk("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
+#endif
+	} else {
+		printk("Bus watcher indicates no error\n");
+	}
+}
+#else
+extern void check_bus_watcher(void);
+#endif
+
+asmlinkage void sb1_cache_error(void)
+{
+	uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
+	unsigned long long cerr_dpa;
+
+#ifdef CONFIG_SIBYTE_BW_TRACE
+	/* Freeze the trace buffer now */
+	csr_out32(M_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG));
+	printk("Trace buffer frozen\n");
+#endif
+
+	printk("Cache error exception on CPU %x:\n",
+	       (read_c0_prid() >> 25) & 0x7);
+
+	__asm__ __volatile__ (
+	"	.set	push\n\t"
+	"	.set	mips64\n\t"
+	"	.set	noat\n\t"
+	"	mfc0	%0, $26\n\t"
+	"	mfc0	%1, $27\n\t"
+	"	mfc0	%2, $27, 1\n\t"
+	"	dmfc0	$1, $27, 3\n\t"
+	"	dsrl32	%3, $1, 0 \n\t"
+	"	sll	%4, $1, 0 \n\t"
+	"	mfc0	%5, $30\n\t"
+	"	.set	pop"
+	: "=r" (errctl), "=r" (cerr_i), "=r" (cerr_d),
+	  "=r" (dpahi), "=r" (dpalo), "=r" (eepc));
+
+	cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo;
+	printk(" c0_errorepc ==	  %08x\n", eepc);
+	printk(" c0_errctl   ==	  %08x", errctl);
+	breakout_errctl(errctl);
+	if (errctl & CP0_ERRCTL_ICACHE) {
+		printk(" c0_cerr_i   ==	  %08x", cerr_i);
+		breakout_cerri(cerr_i);
+		if (CP0_CERRI_IDX_VALID(cerr_i)) {
+			/* Check index of EPC, allowing for delay slot */
+			if (((eepc & SB1_CACHE_INDEX_MASK) != (cerr_i & SB1_CACHE_INDEX_MASK)) &&
+			    ((eepc & SB1_CACHE_INDEX_MASK) != ((cerr_i & SB1_CACHE_INDEX_MASK) - 4)))
+				printk(" cerr_i idx doesn't match eepc\n");
+			else {
+				res = extract_ic(cerr_i & SB1_CACHE_INDEX_MASK,
+						 (cerr_i & CP0_CERRI_DATA) != 0);
+				if (!(res & cerr_i))
+					printk("...didn't see indicated icache problem\n");
+			}
+		}
+	}
+	if (errctl & CP0_ERRCTL_DCACHE) {
+		printk(" c0_cerr_d   ==	  %08x", cerr_d);
+		breakout_cerrd(cerr_d);
+		if (CP0_CERRD_DPA_VALID(cerr_d)) {
+			printk(" c0_cerr_dpa == %010llx\n", cerr_dpa);
+			if (!CP0_CERRD_IDX_VALID(cerr_d)) {
+				res = extract_dc(cerr_dpa & SB1_CACHE_INDEX_MASK,
+						 (cerr_d & CP0_CERRD_DATA) != 0);
+				if (!(res & cerr_d))
+					printk("...didn't see indicated dcache problem\n");
+			} else {
+				if ((cerr_dpa & SB1_CACHE_INDEX_MASK) != (cerr_d & SB1_CACHE_INDEX_MASK))
+					printk(" cerr_d idx doesn't match cerr_dpa\n");
+				else {
+					res = extract_dc(cerr_d & SB1_CACHE_INDEX_MASK,
+							 (cerr_d & CP0_CERRD_DATA) != 0);
+					if (!(res & cerr_d))
+						printk("...didn't see indicated problem\n");
+				}
+			}
+		}
+	}
+
+	check_bus_watcher();
+
+	/*
+	 * Calling panic() when a fatal cache error occurs scrambles the
+	 * state of the system (and the cache), making it difficult to
+	 * investigate after the fact.	However, if you just stall the CPU,
+	 * the other CPU may keep on running, which is typically very
+	 * undesirable.
+	 */
+#ifdef CONFIG_SB1_CERR_STALL
+	while (1)
+		;
+#else
+	panic("unhandled cache error");
+#endif
+}
+
+
+/* Parity lookup table. */
+static const uint8_t parity[256] = {
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+/* Masks to select bits for Hamming parity, mask_72_64[i] for bit[i] */
+static const uint64_t mask_72_64[8] = {
+	0x0738C808099264FFULL,
+	0x38C808099264FF07ULL,
+	0xC808099264FF0738ULL,
+	0x08099264FF0738C8ULL,
+	0x099264FF0738C808ULL,
+	0x9264FF0738C80809ULL,
+	0x64FF0738C8080992ULL,
+	0xFF0738C808099264ULL
+};
+
+/* Calculate the parity on a range of bits */
+static char range_parity(uint64_t dword, int max, int min)
+{
+	char parity = 0;
+	int i;
+	dword >>= min;
+	for (i=max-min; i>=0; i--) {
+		if (dword & 0x1)
+			parity = !parity;
+		dword >>= 1;
+	}
+	return parity;
+}
+
+/* Calculate the 4-bit even byte-parity for an instruction */
+static unsigned char inst_parity(uint32_t word)
+{
+	int i, j;
+	char parity = 0;
+	for (j=0; j<4; j++) {
+		char byte_parity = 0;
+		for (i=0; i<8; i++) {
+			if (word & 0x80000000)
+				byte_parity = !byte_parity;
+			word <<= 1;
+		}
+		parity <<= 1;
+		parity |= byte_parity;
+	}
+	return parity;
+}
+
+static uint32_t extract_ic(unsigned short addr, int data)
+{
+	unsigned short way;
+	int valid;
+	uint32_t taghi, taglolo, taglohi;
+	unsigned long long taglo, va;
+	uint64_t tlo_tmp;
+	uint8_t lru;
+	int res = 0;
+
+	printk("Icache index 0x%04x  ", addr);
+	for (way = 0; way < 4; way++) {
+		/* Index-load-tag-I */
+		__asm__ __volatile__ (
+		"	.set	push		\n\t"
+		"	.set	noreorder	\n\t"
+		"	.set	mips64		\n\t"
+		"	.set	noat		\n\t"
+		"	cache	4, 0(%3)	\n\t"
+		"	mfc0	%0, $29		\n\t"
+		"	dmfc0	$1, $28		\n\t"
+		"	dsrl32	%1, $1, 0	\n\t"
+		"	sll	%2, $1, 0	\n\t"
+		"	.set	pop"
+		: "=r" (taghi), "=r" (taglohi), "=r" (taglolo)
+		: "r" ((way << 13) | addr));
+
+		taglo = ((unsigned long long)taglohi << 32) | taglolo;
+		if (way == 0) {
+			lru = (taghi >> 14) & 0xff;
+			printk("[Bank %d Set 0x%02x]  LRU > %d %d %d %d > MRU\n",
+				    ((addr >> 5) & 0x3), /* bank */
+				    ((addr >> 7) & 0x3f), /* index */
+				    (lru & 0x3),
+				    ((lru >> 2) & 0x3),
+				    ((lru >> 4) & 0x3),
+				    ((lru >> 6) & 0x3));
+		}
+		va = (taglo & 0xC0000FFFFFFFE000ULL) | addr;
+		if ((taglo & (1 << 31)) && (((taglo >> 62) & 0x3) == 3))
+			va |= 0x3FFFF00000000000ULL;
+		valid = ((taghi >> 29) & 1);
+		if (valid) {
+			tlo_tmp = taglo & 0xfff3ff;
+			if (((taglo >> 10) & 1) ^ range_parity(tlo_tmp, 23, 0)) {
+				printk("   ** bad parity in VTag0/G/ASID\n");
+				res |= CP0_CERRI_TAG_PARITY;
+			}
+			if (((taglo >> 11) & 1) ^ range_parity(taglo, 63, 24)) {
+				printk("   ** bad parity in R/VTag1\n");
+				res |= CP0_CERRI_TAG_PARITY;
+			}
+		}
+		if (valid ^ ((taghi >> 27) & 1)) {
+			printk("   ** bad parity for valid bit\n");
+			res |= CP0_CERRI_TAG_PARITY;
+		}
+		printk(" %d  [VA %016llx]  [Vld? %d]  raw tags: %08X-%016llX\n",
+			    way, va, valid, taghi, taglo);
+
+		if (data) {
+			uint32_t datahi, insta, instb;
+			uint8_t predecode;
+			int offset;
+
+			/* (hit all banks and ways) */
+			for (offset = 0; offset < 4; offset++) {
+				/* Index-load-data-I */
+				__asm__ __volatile__ (
+				"	.set	push\n\t"
+				"	.set	noreorder\n\t"
+				"	.set	mips64\n\t"
+				"	.set	noat\n\t"
+				"	cache	6, 0(%3)  \n\t"
+				"	mfc0	%0, $29, 1\n\t"
+				"	dmfc0  $1, $28, 1\n\t"
+				"	dsrl32 %1, $1, 0 \n\t"
+				"	sll    %2, $1, 0 \n\t"
+				"	.set	pop	    \n"
+				: "=r" (datahi), "=r" (insta), "=r" (instb)
+				: "r" ((way << 13) | addr | (offset << 3)));
+				predecode = (datahi >> 8) & 0xff;
+				if (((datahi >> 16) & 1) != (uint32_t)range_parity(predecode, 7, 0)) {
+					printk("   ** bad parity in predecode\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				/* XXXKW should/could check predecode bits themselves */
+				if (((datahi >> 4) & 0xf) ^ inst_parity(insta)) {
+					printk("   ** bad parity in instruction a\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				if ((datahi & 0xf) ^ inst_parity(instb)) {
+					printk("   ** bad parity in instruction b\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				printk("  %05X-%08X%08X", datahi, insta, instb);
+			}
+			printk("\n");
+		}
+	}
+	return res;
+}
+
+/* Compute the ECC for a data doubleword */
+static uint8_t dc_ecc(uint64_t dword)
+{
+	uint64_t t;
+	uint32_t w;
+	uint8_t	 p;
+	int	 i;
+
+	p = 0;
+	for (i = 7; i >= 0; i--)
+	{
+		p <<= 1;
+		t = dword & mask_72_64[i];
+		w = (uint32_t)(t >> 32);
+		p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF]
+		      ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]);
+		w = (uint32_t)(t & 0xFFFFFFFF);
+		p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF]
+		      ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]);
+	}
+	return p;
+}
+
+struct dc_state {
+	unsigned char val;
+	char *name;
+};
+
+static struct dc_state dc_states[] = {
+	{ 0x00, "INVALID" },
+	{ 0x0f, "COH-SHD" },
+	{ 0x13, "NCO-E-C" },
+	{ 0x19, "NCO-E-D" },
+	{ 0x16, "COH-E-C" },
+	{ 0x1c, "COH-E-D" },
+	{ 0xff, "*ERROR*" }
+};
+
+#define DC_TAG_VALID(state) \
+    (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \
+     ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c))
+
+static char *dc_state_str(unsigned char state)
+{
+	struct dc_state *dsc = dc_states;
+	while (dsc->val != 0xff) {
+		if (dsc->val == state)
+			break;
+		dsc++;
+	}
+	return dsc->name;
+}
+
+static uint32_t extract_dc(unsigned short addr, int data)
+{
+	int valid, way;
+	unsigned char state;
+	uint32_t taghi, taglolo, taglohi;
+	unsigned long long taglo, pa;
+	uint8_t ecc, lru;
+	int res = 0;
+
+	printk("Dcache index 0x%04x  ", addr);
+	for (way = 0; way < 4; way++) {
+		__asm__ __volatile__ (
+		"	.set	push\n\t"
+		"	.set	noreorder\n\t"
+		"	.set	mips64\n\t"
+		"	.set	noat\n\t"
+		"	cache	5, 0(%3)\n\t"	/* Index-load-tag-D */
+		"	mfc0	%0, $29, 2\n\t"
+		"	dmfc0	$1, $28, 2\n\t"
+		"	dsrl32	%1, $1, 0\n\t"
+		"	sll	%2, $1, 0\n\t"
+		"	.set	pop"
+		: "=r" (taghi), "=r" (taglohi), "=r" (taglolo)
+		: "r" ((way << 13) | addr));
+
+		taglo = ((unsigned long long)taglohi << 32) | taglolo;
+		pa = (taglo & 0xFFFFFFE000ULL) | addr;
+		if (way == 0) {
+			lru = (taghi >> 14) & 0xff;
+			printk("[Bank %d Set 0x%02x]  LRU > %d %d %d %d > MRU\n",
+				    ((addr >> 11) & 0x2) | ((addr >> 5) & 1), /* bank */
+				    ((addr >> 6) & 0x3f), /* index */
+				    (lru & 0x3),
+				    ((lru >> 2) & 0x3),
+				    ((lru >> 4) & 0x3),
+				    ((lru >> 6) & 0x3));
+		}
+		state = (taghi >> 25) & 0x1f;
+		valid = DC_TAG_VALID(state);
+		printk(" %d  [PA %010llx]  [state %s (%02x)]  raw tags: %08X-%016llX\n",
+			    way, pa, dc_state_str(state), state, taghi, taglo);
+		if (valid) {
+			if (((taglo >> 11) & 1) ^ range_parity(taglo, 39, 26)) {
+				printk("   ** bad parity in PTag1\n");
+				res |= CP0_CERRD_TAG_ADDRESS;
+			}
+			if (((taglo >> 10) & 1) ^ range_parity(taglo, 25, 13)) {
+				printk("   ** bad parity in PTag0\n");
+				res |= CP0_CERRD_TAG_ADDRESS;
+			}
+		} else {
+			res |= CP0_CERRD_TAG_STATE;
+		}
+
+		if (data) {
+			uint32_t datalohi, datalolo, datahi;
+			unsigned long long datalo;
+			int offset;
+			char bad_ecc = 0;
+
+			for (offset = 0; offset < 4; offset++) {
+				/* Index-load-data-D */
+				__asm__ __volatile__ (
+				"	.set	push\n\t"
+				"	.set	noreorder\n\t"
+				"	.set	mips64\n\t"
+				"	.set	noat\n\t"
+				"	cache	7, 0(%3)\n\t" /* Index-load-data-D */
+				"	mfc0	%0, $29, 3\n\t"
+				"	dmfc0	$1, $28, 3\n\t"
+				"	dsrl32	%1, $1, 0 \n\t"
+				"	sll	%2, $1, 0 \n\t"
+				"	.set	pop"
+				: "=r" (datahi), "=r" (datalohi), "=r" (datalolo)
+				: "r" ((way << 13) | addr | (offset << 3)));
+				datalo = ((unsigned long long)datalohi << 32) | datalolo;
+				ecc = dc_ecc(datalo);
+				if (ecc != datahi) {
+					int bits;
+					bad_ecc |= 1 << (3-offset);
+					ecc ^= datahi;
+					bits = hweight8(ecc);
+					res |= (bits == 1) ? CP0_CERRD_DATA_SBE : CP0_CERRD_DATA_DBE;
+				}
+				printk("  %02X-%016llX", datahi, datalo);
+			}
+			printk("\n");
+			if (bad_ecc)
+				printk("  dwords w/ bad ECC: %d %d %d %d\n",
+				       !!(bad_ecc & 8), !!(bad_ecc & 4),
+				       !!(bad_ecc & 2), !!(bad_ecc & 1));
+		}
+	}
+	return res;
+}
diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S
new file mode 100644
index 0000000000..45dff5cd4b
--- /dev/null
+++ b/arch/mips/mm/cex-gen.S
@@ -0,0 +1,42 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995 - 1999 Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ *
+ * Cache error handler
+ */
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+
+/*
+ * Game over.  Go to the button.  Press gently.	 Swear where allowed by
+ * legislation.
+ */
+	LEAF(except_vec2_generic)
+	.set	noreorder
+	.set	noat
+	.set	mips0
+	/*
+	 * This is a very bad place to be.  Our cache error
+	 * detection has triggered.  If we have write-back data
+	 * in the cache, we may not be able to recover.	 As a
+	 * first-order desperate measure, turn off KSEG0 cacheing.
+	 */
+	mfc0	k0,CP0_CONFIG
+	li	k1,~CONF_CM_CMASK
+	and	k0,k0,k1
+	ori	k0,k0,CONF_CM_UNCACHED
+	mtc0	k0,CP0_CONFIG
+	/* Give it a few cycles to sink in... */
+	nop
+	nop
+	nop
+
+	j	cache_parity_error
+	nop
+	END(except_vec2_generic)
diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S
new file mode 100644
index 0000000000..9029092aa7
--- /dev/null
+++ b/arch/mips/mm/cex-oct.S
@@ -0,0 +1,70 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Cavium Networks
+ * Cache error handler
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+
+/*
+ * Handle cache error. Indicate to the second level handler whether
+ * the exception is recoverable.
+ */
+	LEAF(except_vec2_octeon)
+
+	.set	push
+	.set	mips64r2
+	.set	noreorder
+	.set	noat
+
+
+	/* due to an errata we need to read the COP0 CacheErr (Dcache)
+	 * before any cache/DRAM access	 */
+
+	rdhwr	k0, $0	      /* get core_id */
+	PTR_LA	k1, cache_err_dcache
+	sll	k0, k0, 3
+	PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */
+
+	dmfc0	k0, CP0_CACHEERR, 1
+	sd	k0, (k1)
+	dmtc0	$0, CP0_CACHEERR, 1
+
+	/* check whether this is a nested exception */
+	mfc0	k1, CP0_STATUS
+	andi	k1, k1, ST0_EXL
+	beqz	k1, 1f
+	 nop
+	j	cache_parity_error_octeon_non_recoverable
+	 nop
+
+	/* exception is recoverable */
+1:	j	handle_cache_err
+	 nop
+
+	.set	pop
+	END(except_vec2_octeon)
+
+ /* We need to jump to handle_cache_err so that the previous handler
+  * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
+  * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).	*/
+	LEAF(handle_cache_err)
+	.set	push
+	.set	noreorder
+	.set	noat
+
+	SAVE_ALL
+	KMODE
+	jal	cache_parity_error_octeon_recoverable
+	nop
+	j	ret_from_exception
+	nop
+
+	.set pop
+	END(handle_cache_err)
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S
new file mode 100644
index 0000000000..85c6e6a40b
--- /dev/null
+++ b/arch/mips/mm/cex-sb1.S
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001,2002,2003 Broadcom Corporation
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+#include <asm/cacheops.h>
+#include <asm/sibyte/board.h>
+
+#define C0_ERRCTL     $26	      /* CP0: Error info */
+#define C0_CERR_I     $27	      /* CP0: Icache error */
+#define C0_CERR_D     $27,1	      /* CP0: Dcache error */
+
+	/*
+	 * Based on SiByte sample software cache-err/cerr.S
+	 * CVS revision 1.8.  Only the 'unrecoverable' case
+	 * is changed.
+	 */
+
+	.set	mips64
+	.set	noreorder
+	.set	noat
+
+	/*
+	 * sb1_cerr_vec: code to be copied to the Cache Error
+	 * Exception vector.  The code must be pushed out to memory
+	 * (either by copying to Kseg0 and Kseg1 both, or by flushing
+	 * the L1 and L2) since it is fetched as 0xa0000100.
+	 *
+	 * NOTE: Be sure this handler is at most 28 instructions long
+	 * since the final 16 bytes of the exception vector memory
+	 * (0x170-0x17f) are used to preserve k0, k1, and ra.
+	 */
+
+LEAF(except_vec2_sb1)
+	/*
+	 * If this error is recoverable, we need to exit the handler
+	 * without having dirtied any registers.  To do this,
+	 * save/restore k0 and k1 from low memory (Useg is direct
+	 * mapped while ERL=1). Note that we can't save to a
+	 * CPU-specific location without ruining a register in the
+	 * process.  This means we are vulnerable to data corruption
+	 * whenever the handler is reentered by a second CPU.
+	 */
+	sd	k0,0x170($0)
+	sd	k1,0x178($0)
+
+#ifdef CONFIG_SB1_CEX_ALWAYS_FATAL
+	j	handle_vec2_sb1
+	 nop
+#else
+	/*
+	 * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
+	 * if we can fast-path out of here for a h/w-recovered error.
+	 */
+	mfc0	k1,C0_ERRCTL
+	bgtz	k1,attempt_recovery
+	 sll	k0,k1,1
+
+recovered_dcache:
+	/*
+	 * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA).
+	 * Ought to log the occurrence of this recovered dcache error.
+	 */
+	b	recovered
+	 mtc0	$0,C0_CERR_D
+
+attempt_recovery:
+	/*
+	 * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31.  Any
+	 * Dcache errors we can recover from will take more extensive
+	 * processing.	For now, they are considered "unrecoverable".
+	 * Note that 'DC' becoming set (outside of ERL mode) will
+	 * cause 'IC' to clear; so if there's an Icache error, we'll
+	 * only find out about it if we recover from this error and
+	 * continue executing.
+	 */
+	bltz	k0,unrecoverable
+	 sll	k0,1
+
+	/*
+	 * k0 has C0_ERRCTL << 2, which puts 'IC' at bit 31.  If an
+	 * Icache error isn't indicated, I'm not sure why we got here.
+	 * Consider that case "unrecoverable" for now.
+	 */
+	bgez	k0,unrecoverable
+
+attempt_icache_recovery:
+	/*
+	 * External icache errors are due to uncorrectable ECC errors
+	 * in the L2 cache or Memory Controller and cannot be
+	 * recovered here.
+	 */
+	 mfc0	k0,C0_CERR_I		/* delay slot */
+	li	k1,1 << 26		/* ICACHE_EXTERNAL */
+	and	k1,k0
+	bnez	k1,unrecoverable
+	 andi	k0,0x1fe0
+
+	/*
+	 * Since the error is internal, the 'IDX' field from
+	 * CacheErr-I is valid and we can just invalidate all blocks
+	 * in that set.
+	 */
+	cache	Index_Invalidate_I,(0<<13)(k0)
+	cache	Index_Invalidate_I,(1<<13)(k0)
+	cache	Index_Invalidate_I,(2<<13)(k0)
+	cache	Index_Invalidate_I,(3<<13)(k0)
+
+	/* Ought to log this recovered icache error */
+
+recovered:
+	/* Restore the saved registers */
+	ld	k0,0x170($0)
+	ld	k1,0x178($0)
+	eret
+
+unrecoverable:
+	/* Unrecoverable Icache or Dcache error; log it and/or fail */
+	j	handle_vec2_sb1
+	 nop
+#endif
+
+END(except_vec2_sb1)
+
+	LEAF(handle_vec2_sb1)
+	mfc0	k0,CP0_CONFIG
+	li	k1,~CONF_CM_CMASK
+	and	k0,k0,k1
+	ori	k0,k0,CONF_CM_UNCACHED
+	mtc0	k0,CP0_CONFIG
+
+	SSNOP
+	SSNOP
+	SSNOP
+	SSNOP
+	bnezl	$0, 1f
+1:
+	mfc0	k0, CP0_STATUS
+	sll	k0, k0, 3			# check CU0 (kernel?)
+	bltz	k0, 2f
+	 nop
+
+	/* Get a valid Kseg0 stack pointer.  Any task's stack pointer
+	 * will do, although if we ever want to resume execution we
+	 * better not have corrupted any state. */
+	get_saved_sp
+	move	sp, k1
+
+2:
+	j	sb1_cache_error
+	 nop
+
+	END(handle_vec2_sb1)
diff --git a/arch/mips/mm/context.c b/arch/mips/mm/context.c
new file mode 100644
index 0000000000..966f40066f
--- /dev/null
+++ b/arch/mips/mm/context.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/atomic.h>
+#include <linux/mmu_context.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+
+static DEFINE_RAW_SPINLOCK(cpu_mmid_lock);
+
+static atomic64_t mmid_version;
+static unsigned int num_mmids;
+static unsigned long *mmid_map;
+
+static DEFINE_PER_CPU(u64, reserved_mmids);
+static cpumask_t tlb_flush_pending;
+
+static bool asid_versions_eq(int cpu, u64 a, u64 b)
+{
+	return ((a ^ b) & asid_version_mask(cpu)) == 0;
+}
+
+void get_new_mmu_context(struct mm_struct *mm)
+{
+	unsigned int cpu;
+	u64 asid;
+
+	/*
+	 * This function is specific to ASIDs, and should not be called when
+	 * MMIDs are in use.
+	 */
+	if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid))
+		return;
+
+	cpu = smp_processor_id();
+	asid = asid_cache(cpu);
+
+	if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
+		if (cpu_has_vtag_icache)
+			flush_icache_all();
+		local_flush_tlb_all();	/* start new asid cycle */
+	}
+
+	set_cpu_context(cpu, mm, asid);
+	asid_cache(cpu) = asid;
+}
+EXPORT_SYMBOL_GPL(get_new_mmu_context);
+
+void check_mmu_context(struct mm_struct *mm)
+{
+	unsigned int cpu = smp_processor_id();
+
+	/*
+	 * This function is specific to ASIDs, and should not be called when
+	 * MMIDs are in use.
+	 */
+	if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid))
+		return;
+
+	/* Check if our ASID is of an older version and thus invalid */
+	if (!asid_versions_eq(cpu, cpu_context(cpu, mm), asid_cache(cpu)))
+		get_new_mmu_context(mm);
+}
+EXPORT_SYMBOL_GPL(check_mmu_context);
+
+static void flush_context(void)
+{
+	u64 mmid;
+	int cpu;
+
+	/* Update the list of reserved MMIDs and the MMID bitmap */
+	bitmap_zero(mmid_map, num_mmids);
+
+	/* Reserve an MMID for kmap/wired entries */
+	__set_bit(MMID_KERNEL_WIRED, mmid_map);
+
+	for_each_possible_cpu(cpu) {
+		mmid = xchg_relaxed(&cpu_data[cpu].asid_cache, 0);
+
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * MMID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (mmid == 0)
+			mmid = per_cpu(reserved_mmids, cpu);
+
+		__set_bit(mmid & cpu_asid_mask(&cpu_data[cpu]), mmid_map);
+		per_cpu(reserved_mmids, cpu) = mmid;
+	}
+
+	/*
+	 * Queue a TLB invalidation for each CPU to perform on next
+	 * context-switch
+	 */
+	cpumask_setall(&tlb_flush_pending);
+}
+
+static bool check_update_reserved_mmid(u64 mmid, u64 newmmid)
+{
+	bool hit;
+	int cpu;
+
+	/*
+	 * Iterate over the set of reserved MMIDs looking for a match.
+	 * If we find one, then we can update our mm to use newmmid
+	 * (i.e. the same MMID in the current generation) but we can't
+	 * exit the loop early, since we need to ensure that all copies
+	 * of the old MMID are updated to reflect the mm. Failure to do
+	 * so could result in us missing the reserved MMID in a future
+	 * generation.
+	 */
+	hit = false;
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(reserved_mmids, cpu) == mmid) {
+			hit = true;
+			per_cpu(reserved_mmids, cpu) = newmmid;
+		}
+	}
+
+	return hit;
+}
+
+static u64 get_new_mmid(struct mm_struct *mm)
+{
+	static u32 cur_idx = MMID_KERNEL_WIRED + 1;
+	u64 mmid, version, mmid_mask;
+
+	mmid = cpu_context(0, mm);
+	version = atomic64_read(&mmid_version);
+	mmid_mask = cpu_asid_mask(&boot_cpu_data);
+
+	if (!asid_versions_eq(0, mmid, 0)) {
+		u64 newmmid = version | (mmid & mmid_mask);
+
+		/*
+		 * If our current MMID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
+		 */
+		if (check_update_reserved_mmid(mmid, newmmid)) {
+			mmid = newmmid;
+			goto set_context;
+		}
+
+		/*
+		 * We had a valid MMID in a previous life, so try to re-use
+		 * it if possible.
+		 */
+		if (!__test_and_set_bit(mmid & mmid_mask, mmid_map)) {
+			mmid = newmmid;
+			goto set_context;
+		}
+	}
+
+	/* Allocate a free MMID */
+	mmid = find_next_zero_bit(mmid_map, num_mmids, cur_idx);
+	if (mmid != num_mmids)
+		goto reserve_mmid;
+
+	/* We're out of MMIDs, so increment the global version */
+	version = atomic64_add_return_relaxed(asid_first_version(0),
+					      &mmid_version);
+
+	/* Note currently active MMIDs & mark TLBs as requiring flushes */
+	flush_context();
+
+	/* We have more MMIDs than CPUs, so this will always succeed */
+	mmid = find_first_zero_bit(mmid_map, num_mmids);
+
+reserve_mmid:
+	__set_bit(mmid, mmid_map);
+	cur_idx = mmid;
+	mmid |= version;
+set_context:
+	set_cpu_context(0, mm, mmid);
+	return mmid;
+}
+
+void check_switch_mmu_context(struct mm_struct *mm)
+{
+	unsigned int cpu = smp_processor_id();
+	u64 ctx, old_active_mmid;
+	unsigned long flags;
+
+	if (!cpu_has_mmid) {
+		check_mmu_context(mm);
+		write_c0_entryhi(cpu_asid(cpu, mm));
+		goto setup_pgd;
+	}
+
+	/*
+	 * MMID switch fast-path, to avoid acquiring cpu_mmid_lock when it's
+	 * unnecessary.
+	 *
+	 * The memory ordering here is subtle. If our active_mmids is non-zero
+	 * and the MMID matches the current version, then we update the CPU's
+	 * asid_cache with a relaxed cmpxchg. Racing with a concurrent rollover
+	 * means that either:
+	 *
+	 * - We get a zero back from the cmpxchg and end up waiting on
+	 *   cpu_mmid_lock in check_mmu_context(). Taking the lock synchronises
+	 *   with the rollover and so we are forced to see the updated
+	 *   generation.
+	 *
+	 * - We get a valid MMID back from the cmpxchg, which means the
+	 *   relaxed xchg in flush_context will treat us as reserved
+	 *   because atomic RmWs are totally ordered for a given location.
+	 */
+	ctx = cpu_context(cpu, mm);
+	old_active_mmid = READ_ONCE(cpu_data[cpu].asid_cache);
+	if (!old_active_mmid ||
+	    !asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version)) ||
+	    !cmpxchg_relaxed(&cpu_data[cpu].asid_cache, old_active_mmid, ctx)) {
+		raw_spin_lock_irqsave(&cpu_mmid_lock, flags);
+
+		ctx = cpu_context(cpu, mm);
+		if (!asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version)))
+			ctx = get_new_mmid(mm);
+
+		WRITE_ONCE(cpu_data[cpu].asid_cache, ctx);
+		raw_spin_unlock_irqrestore(&cpu_mmid_lock, flags);
+	}
+
+	/*
+	 * Invalidate the local TLB if needed. Note that we must only clear our
+	 * bit in tlb_flush_pending after this is complete, so that the
+	 * cpu_has_shared_ftlb_entries case below isn't misled.
+	 */
+	if (cpumask_test_cpu(cpu, &tlb_flush_pending)) {
+		if (cpu_has_vtag_icache)
+			flush_icache_all();
+		local_flush_tlb_all();
+		cpumask_clear_cpu(cpu, &tlb_flush_pending);
+	}
+
+	write_c0_memorymapid(ctx & cpu_asid_mask(&boot_cpu_data));
+
+	/*
+	 * If this CPU shares FTLB entries with its siblings and one or more of
+	 * those siblings hasn't yet invalidated its TLB following a version
+	 * increase then we need to invalidate any TLB entries for our MMID
+	 * that we might otherwise pick up from a sibling.
+	 *
+	 * We ifdef on CONFIG_SMP because cpu_sibling_map isn't defined in
+	 * CONFIG_SMP=n kernels.
+	 */
+#ifdef CONFIG_SMP
+	if (cpu_has_shared_ftlb_entries &&
+	    cpumask_intersects(&tlb_flush_pending, &cpu_sibling_map[cpu])) {
+		/* Ensure we operate on the new MMID */
+		mtc0_tlbw_hazard();
+
+		/*
+		 * Invalidate all TLB entries associated with the new
+		 * MMID, and wait for the invalidation to complete.
+		 */
+		ginvt_mmid();
+		sync_ginv();
+	}
+#endif
+
+setup_pgd:
+	TLBMISS_HANDLER_SETUP_PGD(mm->pgd);
+}
+EXPORT_SYMBOL_GPL(check_switch_mmu_context);
+
+static int mmid_init(void)
+{
+	if (!cpu_has_mmid)
+		return 0;
+
+	/*
+	 * Expect allocation after rollover to fail if we don't have at least
+	 * one more MMID than CPUs.
+	 */
+	num_mmids = asid_first_version(0);
+	WARN_ON(num_mmids <= num_possible_cpus());
+
+	atomic64_set(&mmid_version, asid_first_version(0));
+	mmid_map = bitmap_zalloc(num_mmids, GFP_KERNEL);
+	if (!mmid_map)
+		panic("Failed to allocate bitmap for %u MMIDs\n", num_mmids);
+
+	/* Reserve an MMID for kmap/wired entries */
+	__set_bit(MMID_KERNEL_WIRED, mmid_map);
+
+	pr_info("MMID allocator initialised with %u entries\n", num_mmids);
+	return 0;
+}
+early_initcall(mmid_init);
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
new file mode 100644
index 0000000000..3c4fc97b9f
--- /dev/null
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001, 06	 Ralf Baechle <ralf@linux-mips.org>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ */
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
+#include <linux/highmem.h>
+
+#include <asm/cache.h>
+#include <asm/cpu-type.h>
+#include <asm/io.h>
+
+/*
+ * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively
+ * fill random cachelines with stale data at any time, requiring an extra
+ * flush post-DMA.
+ *
+ * Warning on the terminology - Linux calls an uncached area coherent;  MIPS
+ * terminology calls memory areas with hardware maintained coherency coherent.
+ *
+ * Note that the R14000 and R16000 should also be checked for in this condition.
+ * However this function is only called on non-I/O-coherent systems and only the
+ * R10000 and R12000 are used in such systems, the SGI IP28 Indigo² rsp.
+ * SGI IP32 aka O2.
+ */
+static inline bool cpu_needs_post_dma_flush(void)
+{
+	switch (boot_cpu_type()) {
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_BMIPS5000:
+	case CPU_LOONGSON2EF:
+	case CPU_XBURST:
+		return true;
+	default:
+		/*
+		 * Presence of MAARs suggests that the CPU supports
+		 * speculatively prefetching data, and therefore requires
+		 * the post-DMA flush/invalidate.
+		 */
+		return cpu_has_maar;
+	}
+}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+	dma_cache_wback_inv((unsigned long)page_address(page), size);
+}
+
+void *arch_dma_set_uncached(void *addr, size_t size)
+{
+	return (void *)(__pa(addr) + UNCAC_BASE);
+}
+
+static inline void dma_sync_virt_for_device(void *addr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback((unsigned long)addr, size);
+		break;
+	case DMA_FROM_DEVICE:
+		dma_cache_inv((unsigned long)addr, size);
+		break;
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv((unsigned long)addr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void dma_sync_virt_for_cpu(void *addr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		break;
+	case DMA_FROM_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		dma_cache_inv((unsigned long)addr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * A single sg entry may refer to multiple physically contiguous pages.  But
+ * we still need to process highmem pages individually.  If highmem is not
+ * configured then the bulk of this loop gets optimized out.
+ */
+static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir, bool for_device)
+{
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned long offset = paddr & ~PAGE_MASK;
+	size_t left = size;
+
+	do {
+		size_t len = left;
+		void *addr;
+
+		if (PageHighMem(page)) {
+			if (offset + len > PAGE_SIZE)
+				len = PAGE_SIZE - offset;
+		}
+
+		addr = kmap_atomic(page);
+		if (for_device)
+			dma_sync_virt_for_device(addr + offset, len, dir);
+		else
+			dma_sync_virt_for_cpu(addr + offset, len, dir);
+		kunmap_atomic(addr);
+
+		offset = 0;
+		page++;
+		left -= len;
+	} while (left);
+}
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	dma_sync_phys(paddr, size, dir, true);
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	if (cpu_needs_post_dma_flush())
+		dma_sync_phys(paddr, size, dir, false);
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+		const struct iommu_ops *iommu, bool coherent)
+{
+	dev->dma_coherent = coherent;
+}
+#endif
diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c
new file mode 100644
index 0000000000..81bc8a34a8
--- /dev/null
+++ b/arch/mips/mm/extable.c
@@ -0,0 +1,25 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1997, 99, 2001 - 2004 Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/extable.h>
+#include <linux/spinlock.h>
+#include <asm/branch.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(exception_epc(regs));
+	if (fixup) {
+		regs->cp0_epc = fixup->nextinsn;
+
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
new file mode 100644
index 0000000000..d7878208bd
--- /dev/null
+++ b/arch/mips/mm/fault.c
@@ -0,0 +1,332 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995 - 2000 by Ralf Baechle
+ */
+#include <linux/context_tracking.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/ratelimit.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/branch.h>
+#include <asm/mmu_context.h>
+#include <asm/ptrace.h>
+#include <asm/highmem.h>		/* For VMALLOC_END */
+#include <linux/kdebug.h>
+
+int show_unhandled_signals = 1;
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+static void __do_page_fault(struct pt_regs *regs, unsigned long write,
+	unsigned long address)
+{
+	struct vm_area_struct * vma = NULL;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	const int field = sizeof(unsigned long) * 2;
+	int si_code;
+	vm_fault_t fault;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
+
+	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
+
+#if 0
+	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
+	       current->comm, current->pid, field, address, write,
+	       field, regs->cp0_epc);
+#endif
+
+#ifdef CONFIG_KPROBES
+	/*
+	 * This is to notify the fault handler of the kprobes.
+	 */
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
+		       current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
+		return;
+#endif
+
+	si_code = SEGV_MAPERR;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+#ifdef CONFIG_64BIT
+# define VMALLOC_FAULT_TARGET no_context
+#else
+# define VMALLOC_FAULT_TARGET vmalloc_fault
+#endif
+
+	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
+		goto VMALLOC_FAULT_TARGET;
+#ifdef MODULE_START
+	if (unlikely(address >= MODULE_START && address < MODULE_END))
+		goto VMALLOC_FAULT_TARGET;
+#endif
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (faulthandler_disabled() || !mm)
+		goto bad_area_nosemaphore;
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+retry:
+	vma = lock_mm_and_find_vma(mm, address, regs);
+	if (!vma)
+		goto bad_area_nosemaphore;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+	si_code = SEGV_ACCERR;
+
+	if (write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		if (cpu_has_rixi) {
+			if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
+#if 0
+				pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n",
+					  raw_smp_processor_id(),
+					  current->comm, current->pid,
+					  field, address, write,
+					  field, regs->cp0_epc);
+#endif
+				goto bad_area;
+			}
+			if (!(vma->vm_flags & VM_READ) &&
+			    exception_epc(regs) != address) {
+#if 0
+				pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n",
+					  raw_smp_processor_id(),
+					  current->comm, current->pid,
+					  field, address, write,
+					  field, regs->cp0_epc);
+#endif
+				goto bad_area;
+			}
+		} else {
+			if (unlikely(!vma_is_accessible(vma)))
+				goto bad_area;
+		}
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(vma, address, flags, regs);
+
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			goto no_context;
+		return;
+	}
+
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		return;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+
+	if (fault & VM_FAULT_RETRY) {
+		flags |= FAULT_FLAG_TRIED;
+
+		/*
+		 * No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+
+		goto retry;
+	}
+
+	mmap_read_unlock(mm);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	mmap_read_unlock(mm);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		tsk->thread.cp0_badvaddr = address;
+		tsk->thread.error_code = write;
+		if (show_unhandled_signals &&
+		    unhandled_signal(tsk, SIGSEGV) &&
+		    __ratelimit(&ratelimit_state)) {
+			pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n",
+				tsk->comm,
+				write ? "write access to" : "read access from",
+				field, address);
+			pr_info("epc = %0*lx in", field,
+				(unsigned long) regs->cp0_epc);
+			print_vma_addr(KERN_CONT " ", regs->cp0_epc);
+			pr_cont("\n");
+			pr_info("ra  = %0*lx in", field,
+				(unsigned long) regs->regs[31]);
+			print_vma_addr(KERN_CONT " ", regs->regs[31]);
+			pr_cont("\n");
+		}
+		current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
+		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?	 */
+	if (fixup_exception(regs)) {
+		current->thread.cp0_baduaddr = address;
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+
+	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
+	       "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
+	       raw_smp_processor_id(), field, address, field, regs->cp0_epc,
+	       field,  regs->regs[31]);
+	die("Oops", regs);
+
+out_of_memory:
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
+	mmap_read_unlock(mm);
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+	return;
+
+do_sigbus:
+	mmap_read_unlock(mm);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+#if 0
+	printk("do_page_fault() #3: sending SIGBUS to %s for "
+	       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
+	       tsk->comm,
+	       write ? "write access to" : "read access from",
+	       field, address,
+	       field, (unsigned long) regs->cp0_epc,
+	       field, (unsigned long) regs->regs[31]);
+#endif
+	current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
+	tsk->thread.cp0_badvaddr = address;
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+
+	return;
+#ifndef CONFIG_64BIT
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int offset = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
+		pud_t *pud, *pud_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset;
+		pgd_k = init_mm.pgd + offset;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+		set_pgd(pgd, *pgd_k);
+
+		p4d = p4d_offset(pgd, address);
+		p4d_k = p4d_offset(pgd_k, address);
+		if (!p4d_present(*p4d_k))
+			goto no_context;
+
+		pud = pud_offset(p4d, address);
+		pud_k = pud_offset(p4d_k, address);
+		if (!pud_present(*pud_k))
+			goto no_context;
+
+		pmd = pmd_offset(pud, address);
+		pmd_k = pmd_offset(pud_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset_kernel(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+#endif
+}
+NOKPROBE_SYMBOL(__do_page_fault);
+
+asmlinkage void do_page_fault(struct pt_regs *regs,
+	unsigned long write, unsigned long address)
+{
+	enum ctx_state prev_state;
+
+	prev_state = exception_enter();
+	__do_page_fault(regs, write, address);
+	exception_exit(prev_state);
+}
+NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
new file mode 100644
index 0000000000..57e2f08f00
--- /dev/null
+++ b/arch/mips/mm/highmem.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+
+unsigned long highstart_pfn, highend_pfn;
+
+void kmap_flush_tlb(unsigned long addr)
+{
+	flush_tlb_one(addr);
+}
+EXPORT_SYMBOL(kmap_flush_tlb);
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
new file mode 100644
index 0000000000..7eaff5b078
--- /dev/null
+++ b/arch/mips/mm/hugetlbpage.c
@@ -0,0 +1,69 @@
+/*
+ * MIPS Huge TLB Page Support for Kernel.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ * Copyright 2005, Embedded Alley Solutions, Inc.
+ * Matt Porter <mporter@embeddedalley.com>
+ * Copyright (C) 2008, 2009 Cavium Networks, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+		      unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
+		       unsigned long sz)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		p4d = p4d_offset(pgd, addr);
+		if (p4d_present(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (pud_present(*pud))
+				pmd = pmd_offset(pud, addr);
+		}
+	}
+	return (pte_t *) pmd;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _PAGE_HUGE) != 0;
+}
+
+int pud_huge(pud_t pud)
+{
+	return (pud_val(pud) & _PAGE_HUGE) != 0;
+}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
new file mode 100644
index 0000000000..6e368a4658
--- /dev/null
+++ b/arch/mips/mm/init.c
@@ -0,0 +1,573 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 2000 Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+ */
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pfn.h>
+#include <linux/hardirq.h>
+#include <linux/gfp.h>
+#include <linux/kcore.h>
+#include <linux/initrd.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cachectl.h>
+#include <asm/cpu.h>
+#include <asm/dma.h>
+#include <asm/maar.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/fixmap.h>
+
+/*
+ * We have up to 8 empty zeroed pages so we can map one of the right colour
+ * when needed.	 This is necessary only on R4000 / R4400 SC and MC versions
+ * where we have to avoid VCED / VECI exceptions for good performance at
+ * any price.  Since page is never written to after the initialization we
+ * don't have to care about aliases on other CPUs.
+ */
+unsigned long empty_zero_page, zero_page_mask;
+EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
+
+/*
+ * Not static inline because used by IP27 special magic initialization code
+ */
+void setup_zero_pages(void)
+{
+	unsigned int order, i;
+	struct page *page;
+
+	if (cpu_has_vce)
+		order = 3;
+	else
+		order = 0;
+
+	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!empty_zero_page)
+		panic("Oh boy, that early out of memory?");
+
+	page = virt_to_page((void *)empty_zero_page);
+	split_page(page, order);
+	for (i = 0; i < (1 << order); i++, page++)
+		mark_page_reserved(page);
+
+	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
+}
+
+static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
+{
+	enum fixed_addresses idx;
+	unsigned int old_mmid;
+	unsigned long vaddr, flags, entrylo;
+	unsigned long old_ctx;
+	pte_t pte;
+	int tlbidx;
+
+	BUG_ON(folio_test_dcache_dirty(page_folio(page)));
+
+	preempt_disable();
+	pagefault_disable();
+	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
+	idx += in_interrupt() ? FIX_N_COLOURS : 0;
+	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
+	pte = mk_pte(page, prot);
+#if defined(CONFIG_XPA)
+	entrylo = pte_to_entrylo(pte.pte_high);
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+	entrylo = pte.pte_high;
+#else
+	entrylo = pte_to_entrylo(pte_val(pte));
+#endif
+
+	local_irq_save(flags);
+	old_ctx = read_c0_entryhi();
+	write_c0_entryhi(vaddr & (PAGE_MASK << 1));
+	write_c0_entrylo0(entrylo);
+	write_c0_entrylo1(entrylo);
+	if (cpu_has_mmid) {
+		old_mmid = read_c0_memorymapid();
+		write_c0_memorymapid(MMID_KERNEL_WIRED);
+	}
+#ifdef CONFIG_XPA
+	if (cpu_has_xpa) {
+		entrylo = (pte.pte_low & _PFNX_MASK);
+		writex_c0_entrylo0(entrylo);
+		writex_c0_entrylo1(entrylo);
+	}
+#endif
+	tlbidx = num_wired_entries();
+	write_c0_wired(tlbidx + 1);
+	write_c0_index(tlbidx);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	if (cpu_has_mmid)
+		write_c0_memorymapid(old_mmid);
+	local_irq_restore(flags);
+
+	return (void*) vaddr;
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	return __kmap_pgprot(page, addr, PAGE_KERNEL);
+}
+
+void *kmap_noncoherent(struct page *page, unsigned long addr)
+{
+	return __kmap_pgprot(page, addr, PAGE_KERNEL_NC);
+}
+
+void kunmap_coherent(void)
+{
+	unsigned int wired;
+	unsigned long flags, old_ctx;
+
+	local_irq_save(flags);
+	old_ctx = read_c0_entryhi();
+	wired = num_wired_entries() - 1;
+	write_c0_wired(wired);
+	write_c0_index(wired);
+	write_c0_entryhi(UNIQUE_ENTRYHI(wired));
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+	pagefault_enable();
+	preempt_enable();
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	struct folio *src = page_folio(from);
+	void *vfrom, *vto;
+
+	vto = kmap_atomic(to);
+	if (cpu_has_dc_aliases &&
+	    folio_mapped(src) && !folio_test_dcache_dirty(src)) {
+		vfrom = kmap_coherent(from, vaddr);
+		copy_page(vto, vfrom);
+		kunmap_coherent();
+	} else {
+		vfrom = kmap_atomic(from);
+		copy_page(vto, vfrom);
+		kunmap_atomic(vfrom);
+	}
+	if ((!cpu_has_ic_fills_f_dc) ||
+	    pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+		flush_data_cache_page((unsigned long)vto);
+	kunmap_atomic(vto);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+
+void copy_to_user_page(struct vm_area_struct *vma,
+	struct page *page, unsigned long vaddr, void *dst, const void *src,
+	unsigned long len)
+{
+	struct folio *folio = page_folio(page);
+
+	if (cpu_has_dc_aliases &&
+	    folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (cpu_has_dc_aliases)
+			folio_set_dcache_dirty(folio);
+	}
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma,
+	struct page *page, unsigned long vaddr, void *dst, const void *src,
+	unsigned long len)
+{
+	struct folio *folio = page_folio(page);
+
+	if (cpu_has_dc_aliases &&
+	    folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
+		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent();
+	} else {
+		memcpy(dst, src, len);
+		if (cpu_has_dc_aliases)
+			folio_set_dcache_dirty(folio);
+	}
+}
+EXPORT_SYMBOL_GPL(copy_from_user_page);
+
+void __init fixrange_init(unsigned long start, unsigned long end,
+	pgd_t *pgd_base)
+{
+#ifdef CONFIG_HIGHMEM
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j, k;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = pgd_index(vaddr);
+	j = pud_index(vaddr);
+	k = pmd_index(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
+		pud = (pud_t *)pgd;
+		for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) {
+			pmd = (pmd_t *)pud;
+			for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) {
+				if (pmd_none(*pmd)) {
+					pte = (pte_t *) memblock_alloc_low(PAGE_SIZE,
+									   PAGE_SIZE);
+					if (!pte)
+						panic("%s: Failed to allocate %lu bytes align=%lx\n",
+						      __func__, PAGE_SIZE,
+						      PAGE_SIZE);
+
+					set_pmd(pmd, __pmd((unsigned long)pte));
+					BUG_ON(pte != pte_offset_kernel(pmd, 0));
+				}
+				vaddr += PMD_SIZE;
+			}
+			k = 0;
+		}
+		j = 0;
+	}
+#endif
+}
+
+struct maar_walk_info {
+	struct maar_config cfg[16];
+	unsigned int num_cfg;
+};
+
+static int maar_res_walk(unsigned long start_pfn, unsigned long nr_pages,
+			 void *data)
+{
+	struct maar_walk_info *wi = data;
+	struct maar_config *cfg = &wi->cfg[wi->num_cfg];
+	unsigned int maar_align;
+
+	/* MAAR registers hold physical addresses right shifted by 4 bits */
+	maar_align = BIT(MIPS_MAAR_ADDR_SHIFT + 4);
+
+	/* Fill in the MAAR config entry */
+	cfg->lower = ALIGN(PFN_PHYS(start_pfn), maar_align);
+	cfg->upper = ALIGN_DOWN(PFN_PHYS(start_pfn + nr_pages), maar_align) - 1;
+	cfg->attrs = MIPS_MAAR_S;
+
+	/* Ensure we don't overflow the cfg array */
+	if (!WARN_ON(wi->num_cfg >= ARRAY_SIZE(wi->cfg)))
+		wi->num_cfg++;
+
+	return 0;
+}
+
+
+unsigned __weak platform_maar_init(unsigned num_pairs)
+{
+	unsigned int num_configured;
+	struct maar_walk_info wi;
+
+	wi.num_cfg = 0;
+	walk_system_ram_range(0, max_pfn, &wi, maar_res_walk);
+
+	num_configured = maar_config(wi.cfg, wi.num_cfg, num_pairs);
+	if (num_configured < wi.num_cfg)
+		pr_warn("Not enough MAAR pairs (%u) for all memory regions (%u)\n",
+			num_pairs, wi.num_cfg);
+
+	return num_configured;
+}
+
+void maar_init(void)
+{
+	unsigned num_maars, used, i;
+	phys_addr_t lower, upper, attr;
+	static struct {
+		struct maar_config cfgs[3];
+		unsigned used;
+	} recorded = { { { 0 } }, 0 };
+
+	if (!cpu_has_maar)
+		return;
+
+	/* Detect the number of MAARs */
+	write_c0_maari(~0);
+	back_to_back_c0_hazard();
+	num_maars = read_c0_maari() + 1;
+
+	/* MAARs should be in pairs */
+	WARN_ON(num_maars % 2);
+
+	/* Set MAARs using values we recorded already */
+	if (recorded.used) {
+		used = maar_config(recorded.cfgs, recorded.used, num_maars / 2);
+		BUG_ON(used != recorded.used);
+	} else {
+		/* Configure the required MAARs */
+		used = platform_maar_init(num_maars / 2);
+	}
+
+	/* Disable any further MAARs */
+	for (i = (used * 2); i < num_maars; i++) {
+		write_c0_maari(i);
+		back_to_back_c0_hazard();
+		write_c0_maar(0);
+		back_to_back_c0_hazard();
+	}
+
+	if (recorded.used)
+		return;
+
+	pr_info("MAAR configuration:\n");
+	for (i = 0; i < num_maars; i += 2) {
+		write_c0_maari(i);
+		back_to_back_c0_hazard();
+		upper = read_c0_maar();
+#ifdef CONFIG_XPA
+		upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT;
+#endif
+
+		write_c0_maari(i + 1);
+		back_to_back_c0_hazard();
+		lower = read_c0_maar();
+#ifdef CONFIG_XPA
+		lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT;
+#endif
+
+		attr = lower & upper;
+		lower = (lower & MIPS_MAAR_ADDR) << 4;
+		upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
+
+		pr_info("  [%d]: ", i / 2);
+		if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) {
+			pr_cont("disabled\n");
+			continue;
+		}
+
+		pr_cont("%pa-%pa", &lower, &upper);
+
+		if (attr & MIPS_MAAR_S)
+			pr_cont(" speculate");
+
+		pr_cont("\n");
+
+		/* Record the setup for use on secondary CPUs */
+		if (used <= ARRAY_SIZE(recorded.cfgs)) {
+			recorded.cfgs[recorded.used].lower = lower;
+			recorded.cfgs[recorded.used].upper = upper;
+			recorded.cfgs[recorded.used].attrs = attr;
+			recorded.used++;
+		}
+	}
+}
+
+#ifndef CONFIG_NUMA
+void __init paging_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+	pagetable_init();
+
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
+
+	if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) {
+		printk(KERN_WARNING "This processor doesn't support highmem."
+		       " %ldk highmem ignored\n",
+		       (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10));
+		max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn;
+	}
+
+	max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
+#else
+	max_mapnr = max_low_pfn;
+#endif
+	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
+
+	free_area_init(max_zone_pfns);
+}
+
+#ifdef CONFIG_64BIT
+static struct kcore_list kcore_kseg0;
+#endif
+
+static inline void __init mem_init_free_highmem(void)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long tmp;
+
+	if (cpu_has_dc_aliases)
+		return;
+
+	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+		struct page *page = pfn_to_page(tmp);
+
+		if (!memblock_is_memory(PFN_PHYS(tmp)))
+			SetPageReserved(page);
+		else
+			free_highmem_page(page);
+	}
+#endif
+}
+
+void __init mem_init(void)
+{
+	/*
+	 * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE
+	 * bits to hold a full 32b physical address on MIPS32 systems.
+	 */
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
+
+	maar_init();
+	memblock_free_all();
+	setup_zero_pages();	/* Setup zeroed pages.  */
+	mem_init_free_highmem();
+
+#ifdef CONFIG_64BIT
+	if ((unsigned long) &_text > (unsigned long) CKSEG0)
+		/* The -4 is a hack so that user tools don't have to handle
+		   the overflow.  */
+		kclist_add(&kcore_kseg0, (void *) CKSEG0,
+				0x80000000 - 4, KCORE_TEXT);
+#endif
+}
+#endif /* !CONFIG_NUMA */
+
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long pfn;
+
+	for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) {
+		struct page *page = pfn_to_page(pfn);
+		void *addr = phys_to_virt(PFN_PHYS(pfn));
+
+		memset(addr, POISON_FREE_INITMEM, PAGE_SIZE);
+		free_reserved_page(page);
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void (*free_init_pages_eva)(void *begin, void *end) = NULL;
+
+void __weak __init prom_free_prom_memory(void)
+{
+	/* nothing to do */
+}
+
+void __ref free_initmem(void)
+{
+	prom_free_prom_memory();
+	/*
+	 * Let the platform define a specific function to free the
+	 * init section since EVA may have used any possible mapping
+	 * between virtual and physical addresses.
+	 */
+	if (free_init_pages_eva)
+		free_init_pages_eva((void *)&__init_begin, (void *)&__init_end);
+	else
+		free_initmem_default(POISON_FREE_INITMEM);
+}
+
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	return node_distance(cpu_to_node(from), cpu_to_node(to));
+}
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return cpu_to_node(cpu);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+				    pcpu_cpu_distance,
+				    pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("Failed to initialize percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
+
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+unsigned long pgd_current[NR_CPUS];
+#endif
+
+/*
+ * Align swapper_pg_dir in to 64K, allows its address to be loaded
+ * with a single LUI instruction in the TLB handlers.  If we used
+ * __aligned(64K), its size would get rounded up to the alignment
+ * size, and waste space.  So we place it in its own section and align
+ * it in the linker script.
+ */
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
+#ifndef __PAGETABLE_PUD_FOLDED
+pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+#ifndef __PAGETABLE_PMD_FOLDED
+pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
+EXPORT_SYMBOL_GPL(invalid_pmd_table);
+#endif
+pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
+EXPORT_SYMBOL(invalid_pte_table);
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
new file mode 100644
index 0000000000..b6dad2fd55
--- /dev/null
+++ b/arch/mips/mm/ioremap.c
@@ -0,0 +1,119 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2001, 2002 Ralf Baechle
+ */
+#include <linux/export.h>
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <ioremap.h>
+
+#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL))
+#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
+
+static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
+			       void *arg)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pfn_valid(start_pfn + i) &&
+		    !PageReserved(pfn_to_page(start_pfn + i)))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * ioremap_prot     -   map bus memory into CPU space
+ * @phys_addr:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap_prot gives the caller control over cache coherency attributes (CCA)
+ */
+void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size,
+		unsigned long prot_val)
+{
+	unsigned long flags = prot_val & _CACHE_MASK;
+	unsigned long offset, pfn, last_pfn;
+	struct vm_struct *area;
+	phys_addr_t last_addr;
+	unsigned long vaddr;
+	void __iomem *cpu_addr;
+
+	cpu_addr = plat_ioremap(phys_addr, size, flags);
+	if (cpu_addr)
+		return cpu_addr;
+
+	phys_addr = fixup_bigphys_addr(phys_addr, size);
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Map uncached objects in the low 512mb of address space using KSEG1,
+	 * otherwise map using page tables.
+	 */
+	if (IS_LOW512(phys_addr) && IS_LOW512(last_addr) &&
+	    flags == _CACHE_UNCACHED)
+		return (void __iomem *) CKSEG1ADDR(phys_addr);
+
+	/*
+	 * Don't allow anybody to remap RAM that may be allocated by the page
+	 * allocator, since that could lead to races & data clobbering.
+	 */
+	pfn = PFN_DOWN(phys_addr);
+	last_pfn = PFN_DOWN(last_addr);
+	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+				  __ioremap_check_ram) == 1) {
+		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+			  &phys_addr, &last_addr);
+		return NULL;
+	}
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	vaddr = (unsigned long)area->addr;
+
+	flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE;
+	if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
+			__pgprot(flags))) {
+		free_vm_area(area);
+		return NULL;
+	}
+
+	return (void __iomem *)(vaddr + offset);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void iounmap(const volatile void __iomem *addr)
+{
+	if (!plat_iounmap(addr) && !IS_KSEG1(addr))
+		vunmap((void *)((unsigned long)addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c
new file mode 100644
index 0000000000..15e7820d6a
--- /dev/null
+++ b/arch/mips/mm/ioremap64.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/io.h>
+#include <ioremap.h>
+
+void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+		unsigned long prot_val)
+{
+	unsigned long flags = prot_val & _CACHE_MASK;
+	u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE);
+	void __iomem *addr;
+
+	addr = plat_ioremap(offset, size, flags);
+	if (!addr)
+		addr = (void __iomem *)(unsigned long)(base + offset);
+	return addr;
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void iounmap(const volatile void __iomem *addr)
+{
+	plat_iounmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/mips/mm/maccess.c b/arch/mips/mm/maccess.c
new file mode 100644
index 0000000000..58173842c6
--- /dev/null
+++ b/arch/mips/mm/maccess.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+	/* highest bit set means kernel space */
+	return (unsigned long)unsafe_src >> (BITS_PER_LONG - 1);
+}
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
new file mode 100644
index 0000000000..00fe90c6db
--- /dev/null
+++ b/arch/mips/mm/mmap.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/compiler.h>
+#include <linux/elf-randomize.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/export.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+EXPORT_SYMBOL(shm_align_mask);
+
+#define COLOUR_ALIGN(addr, pgoff)				\
+	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
+	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+
+enum mmap_allocation_direction {UP, DOWN};
+
+static unsigned long arch_get_unmapped_area_common(struct file *filp,
+	unsigned long addr0, unsigned long len, unsigned long pgoff,
+	unsigned long flags, enum mmap_allocation_direction dir)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long addr = addr0;
+	int do_color_align;
+	struct vm_unmapped_area_info info;
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		/* Even MAP_FIXED mappings must reside within TASK_SIZE */
+		if (TASK_SIZE - len < addr)
+			return -EINVAL;
+
+		/*
+		 * We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.length = len;
+	info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+
+	if (dir == DOWN) {
+		info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+		info.low_limit = PAGE_SIZE;
+		info.high_limit = mm->mmap_base;
+		addr = vm_unmapped_area(&info);
+
+		if (!(addr & ~PAGE_MASK))
+			return addr;
+
+		/*
+		 * A failed mmap() very likely causes application failure,
+		 * so fall back to the bottom-up function here. This scenario
+		 * can happen with large stack limits and large mmap()
+		 * allocations.
+		 */
+	}
+
+	info.flags = 0;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	return arch_get_unmapped_area_common(filp,
+			addr0, len, pgoff, flags, UP);
+}
+
+/*
+ * There is no need to export this but sched.h declares the function as
+ * extern so making it static here results in an error.
+ */
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+	unsigned long addr0, unsigned long len, unsigned long pgoff,
+	unsigned long flags)
+{
+	return arch_get_unmapped_area_common(filp,
+			addr0, len, pgoff, flags, DOWN);
+}
+
+bool __virt_addr_valid(const volatile void *kaddr)
+{
+	unsigned long vaddr = (unsigned long)kaddr;
+
+	if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE))
+		return false;
+
+	return pfn_valid(PFN_DOWN(virt_to_phys(kaddr)));
+}
+EXPORT_SYMBOL_GPL(__virt_addr_valid);
diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S
new file mode 100644
index 0000000000..42d0516ca1
--- /dev/null
+++ b/arch/mips/mm/page-funcs.S
@@ -0,0 +1,53 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Micro-assembler generated clear_page/copy_page functions.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/export.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#define cpu_clear_page_function_name	clear_page_cpu
+#define cpu_copy_page_function_name	copy_page_cpu
+#else
+#define cpu_clear_page_function_name	clear_page
+#define cpu_copy_page_function_name	copy_page
+#endif
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x058 bytes
+ * R4600 v1.7:				0x05c bytes
+ * R4600 v2.0:				0x060 bytes
+ * With prefetching, 16 word strides	0x120 bytes
+ */
+EXPORT(__clear_page_start)
+LEAF(cpu_clear_page_function_name)
+EXPORT_SYMBOL(cpu_clear_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 288
+END(cpu_clear_page_function_name)
+EXPORT(__clear_page_end)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x11c bytes
+ * R4600 v1.7:				0x080 bytes
+ * R4600 v2.0:				0x07c bytes
+ * With prefetching, 16 word strides	0x540 bytes
+ */
+EXPORT(__copy_page_start)
+LEAF(cpu_copy_page_function_name)
+EXPORT_SYMBOL(cpu_copy_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 1344
+END(cpu_copy_page_function_name)
+EXPORT(__copy_page_end)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
new file mode 100644
index 0000000000..d3b4459d0f
--- /dev/null
+++ b/arch/mips/mm/page.c
@@ -0,0 +1,682 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2008  Thiemo Seufer
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+
+#include <asm/bugs.h>
+#include <asm/cacheops.h>
+#include <asm/cpu-type.h>
+#include <asm/inst.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/prefetch.h>
+#include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
+#include <asm/sibyte/sb1250_dma.h>
+#endif
+
+#include <asm/uasm.h>
+
+/* Registers used in the assembled routines. */
+#define ZERO 0
+#define AT 2
+#define A0 4
+#define A1 5
+#define A2 6
+#define T0 8
+#define T1 9
+#define T2 10
+#define T3 11
+#define T9 25
+#define RA 31
+
+/* Handle labels (which must be positive integers). */
+enum label_id {
+	label_clear_nopref = 1,
+	label_clear_pref,
+	label_copy_nopref,
+	label_copy_pref_both,
+	label_copy_pref_store,
+};
+
+UASM_L_LA(_clear_nopref)
+UASM_L_LA(_clear_pref)
+UASM_L_LA(_copy_nopref)
+UASM_L_LA(_copy_pref_both)
+UASM_L_LA(_copy_pref_store)
+
+/* We need one branch and therefore one relocation per target label. */
+static struct uasm_label labels[5];
+static struct uasm_reloc relocs[5];
+
+#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
+#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
+
+/*
+ * R6 has a limited offset of the pref instruction.
+ * Skip it if the offset is more than 9 bits.
+ */
+#define _uasm_i_pref(a, b, c, d)		\
+do {						\
+	if (cpu_has_mips_r6) {			\
+		if (c <= 0xff && c >= -0x100)	\
+			uasm_i_pref(a, b, c, d);\
+	} else {				\
+		uasm_i_pref(a, b, c, d);	\
+	}					\
+} while(0)
+
+static int pref_bias_clear_store;
+static int pref_bias_copy_load;
+static int pref_bias_copy_store;
+
+static u32 pref_src_mode;
+static u32 pref_dst_mode;
+
+static int clear_word_size;
+static int copy_word_size;
+
+static int half_clear_loop_size;
+static int half_copy_loop_size;
+
+static int cache_line_size;
+#define cache_line_mask() (cache_line_size - 1)
+
+static inline void
+pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
+{
+	if (cpu_has_64bit_gp_regs &&
+	    IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) &&
+	    r4k_daddiu_bug()) {
+		if (off > 0x7fff) {
+			uasm_i_lui(buf, T9, uasm_rel_hi(off));
+			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+		} else
+			uasm_i_addiu(buf, T9, ZERO, off);
+		uasm_i_daddu(buf, reg1, reg2, T9);
+	} else {
+		if (off > 0x7fff) {
+			uasm_i_lui(buf, T9, uasm_rel_hi(off));
+			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+			UASM_i_ADDU(buf, reg1, reg2, T9);
+		} else
+			UASM_i_ADDIU(buf, reg1, reg2, off);
+	}
+}
+
+static void set_prefetch_parameters(void)
+{
+	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
+		clear_word_size = 8;
+	else
+		clear_word_size = 4;
+
+	if (cpu_has_64bit_gp_regs)
+		copy_word_size = 8;
+	else
+		copy_word_size = 4;
+
+	/*
+	 * The pref's used here are using "streaming" hints, which cause the
+	 * copied data to be kicked out of the cache sooner.  A page copy often
+	 * ends up copying a lot more data than is commonly used, so this seems
+	 * to make sense in terms of reducing cache pollution, but I've no real
+	 * performance data to back this up.
+	 */
+	if (cpu_has_prefetch) {
+		/*
+		 * XXX: Most prefetch bias values in here are based on
+		 * guesswork.
+		 */
+		cache_line_size = cpu_dcache_line_size();
+		switch (current_cpu_type()) {
+		case CPU_R5500:
+		case CPU_TX49XX:
+			/* These processors only support the Pref_Load. */
+			pref_bias_copy_load = 256;
+			break;
+
+		case CPU_R10000:
+		case CPU_R12000:
+		case CPU_R14000:
+		case CPU_R16000:
+			/*
+			 * Those values have been experimentally tuned for an
+			 * Origin 200.
+			 */
+			pref_bias_clear_store = 512;
+			pref_bias_copy_load = 256;
+			pref_bias_copy_store = 256;
+			pref_src_mode = Pref_LoadStreamed;
+			pref_dst_mode = Pref_StoreStreamed;
+			break;
+
+		case CPU_SB1:
+		case CPU_SB1A:
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 128;
+			pref_bias_copy_store = 128;
+			/*
+			 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
+			 * hints are broken.
+			 */
+			if (current_cpu_type() == CPU_SB1 &&
+			    (current_cpu_data.processor_id & 0xff) < 0x02) {
+				pref_src_mode = Pref_Load;
+				pref_dst_mode = Pref_Store;
+			} else {
+				pref_src_mode = Pref_LoadStreamed;
+				pref_dst_mode = Pref_StoreStreamed;
+			}
+			break;
+
+		case CPU_LOONGSON64:
+			/* Loongson-3 only support the Pref_Load/Pref_Store. */
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 128;
+			pref_bias_copy_store = 128;
+			pref_src_mode = Pref_Load;
+			pref_dst_mode = Pref_Store;
+			break;
+
+		default:
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 256;
+			pref_bias_copy_store = 128;
+			pref_src_mode = Pref_LoadStreamed;
+			if (cpu_has_mips_r6)
+				/*
+				 * Bit 30 (Pref_PrepareForStore) has been
+				 * removed from MIPS R6. Use bit 5
+				 * (Pref_StoreStreamed).
+				 */
+				pref_dst_mode = Pref_StoreStreamed;
+			else
+				pref_dst_mode = Pref_PrepareForStore;
+			break;
+		}
+	} else {
+		if (cpu_has_cache_cdex_s)
+			cache_line_size = cpu_scache_line_size();
+		else if (cpu_has_cache_cdex_p)
+			cache_line_size = cpu_dcache_line_size();
+	}
+	/*
+	 * Too much unrolling will overflow the available space in
+	 * clear_space_array / copy_page_array.
+	 */
+	half_clear_loop_size = min(16 * clear_word_size,
+				   max(cache_line_size >> 1,
+				       4 * clear_word_size));
+	half_copy_loop_size = min(16 * copy_word_size,
+				  max(cache_line_size >> 1,
+				      4 * copy_word_size));
+}
+
+static void build_clear_store(u32 **buf, int off)
+{
+	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
+		uasm_i_sd(buf, ZERO, off, A0);
+	} else {
+		uasm_i_sw(buf, ZERO, off, A0);
+	}
+}
+
+static inline void build_clear_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_clear_store) {
+		_uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
+			    A0);
+	} else if (cache_line_size == (half_clear_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
+			    cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
+
+			if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
+			    cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
+
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
+	}
+}
+
+extern u32 __clear_page_start;
+extern u32 __clear_page_end;
+extern u32 __copy_page_start;
+extern u32 __copy_page_end;
+
+void build_clear_page(void)
+{
+	int off;
+	u32 *buf = &__clear_page_start;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	int i;
+	static atomic_t run_once = ATOMIC_INIT(0);
+
+	if (atomic_xchg(&run_once, 1)) {
+		return;
+	}
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	set_prefetch_parameters();
+
+	/*
+	 * This algorithm makes the following assumptions:
+	 *   - The prefetch bias is a multiple of 2 words.
+	 *   - The prefetch bias is less than one page.
+	 */
+	BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+	BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+
+	off = PAGE_SIZE - pref_bias_clear_store;
+	if (off > 0xffff || !pref_bias_clear_store)
+		pg_addiu(&buf, A2, A0, off);
+	else
+		uasm_i_ori(&buf, A2, A0, off);
+
+	if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
+		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
+
+	off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
+				* cache_line_size : 0;
+	while (off) {
+		build_clear_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	uasm_l_clear_pref(&l, buf);
+	do {
+		build_clear_pref(&buf, off);
+		build_clear_store(&buf, off);
+		off += clear_word_size;
+	} while (off < half_clear_loop_size);
+	pg_addiu(&buf, A0, A0, 2 * off);
+	off = -off;
+	do {
+		build_clear_pref(&buf, off);
+		if (off == -clear_word_size)
+			uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
+		build_clear_store(&buf, off);
+		off += clear_word_size;
+	} while (off < 0);
+
+	if (pref_bias_clear_store) {
+		pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+		uasm_l_clear_nopref(&l, buf);
+		off = 0;
+		do {
+			build_clear_store(&buf, off);
+			off += clear_word_size;
+		} while (off < half_clear_loop_size);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			if (off == -clear_word_size)
+				uasm_il_bne(&buf, &r, A0, A2,
+					    label_clear_nopref);
+			build_clear_store(&buf, off);
+			off += clear_word_size;
+		} while (off < 0);
+	}
+
+	uasm_i_jr(&buf, RA);
+	uasm_i_nop(&buf);
+
+	BUG_ON(buf > &__clear_page_end);
+
+	uasm_resolve_relocs(relocs, labels);
+
+	pr_debug("Synthesized clear page handler (%u instructions).\n",
+		 (u32)(buf - &__clear_page_start));
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+	for (i = 0; i < (buf - &__clear_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
+	pr_debug("\t.set pop\n");
+}
+
+static void build_copy_load(u32 **buf, int reg, int off)
+{
+	if (cpu_has_64bit_gp_regs) {
+		uasm_i_ld(buf, reg, off, A1);
+	} else {
+		uasm_i_lw(buf, reg, off, A1);
+	}
+}
+
+static void build_copy_store(u32 **buf, int reg, int off)
+{
+	if (cpu_has_64bit_gp_regs) {
+		uasm_i_sd(buf, reg, off, A0);
+	} else {
+		uasm_i_sw(buf, reg, off, A0);
+	}
+}
+
+static inline void build_copy_load_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_copy_load)
+		_uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
+}
+
+static inline void build_copy_store_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_copy_store) {
+		_uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
+			    A0);
+	} else if (cache_line_size == (half_copy_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
+			    cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
+
+			if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
+			    cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
+
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
+	}
+}
+
+void build_copy_page(void)
+{
+	int off;
+	u32 *buf = &__copy_page_start;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	int i;
+	static atomic_t run_once = ATOMIC_INIT(0);
+
+	if (atomic_xchg(&run_once, 1)) {
+		return;
+	}
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	set_prefetch_parameters();
+
+	/*
+	 * This algorithm makes the following assumptions:
+	 *   - All prefetch biases are multiples of 8 words.
+	 *   - The prefetch biases are less than one page.
+	 *   - The store prefetch bias isn't greater than the load
+	 *     prefetch bias.
+	 */
+	BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
+	BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
+	BUG_ON(PAGE_SIZE < pref_bias_copy_load);
+	BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
+
+	off = PAGE_SIZE - pref_bias_copy_load;
+	if (off > 0xffff || !pref_bias_copy_load)
+		pg_addiu(&buf, A2, A0, off);
+	else
+		uasm_i_ori(&buf, A2, A0, off);
+
+	if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
+		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
+
+	off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
+				cache_line_size : 0;
+	while (off) {
+		build_copy_load_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
+				cache_line_size : 0;
+	while (off) {
+		build_copy_store_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	uasm_l_copy_pref_both(&l, buf);
+	do {
+		build_copy_load_pref(&buf, off);
+		build_copy_load(&buf, T0, off);
+		build_copy_load_pref(&buf, off + copy_word_size);
+		build_copy_load(&buf, T1, off + copy_word_size);
+		build_copy_load_pref(&buf, off + 2 * copy_word_size);
+		build_copy_load(&buf, T2, off + 2 * copy_word_size);
+		build_copy_load_pref(&buf, off + 3 * copy_word_size);
+		build_copy_load(&buf, T3, off + 3 * copy_word_size);
+		build_copy_store_pref(&buf, off);
+		build_copy_store(&buf, T0, off);
+		build_copy_store_pref(&buf, off + copy_word_size);
+		build_copy_store(&buf, T1, off + copy_word_size);
+		build_copy_store_pref(&buf, off + 2 * copy_word_size);
+		build_copy_store(&buf, T2, off + 2 * copy_word_size);
+		build_copy_store_pref(&buf, off + 3 * copy_word_size);
+		build_copy_store(&buf, T3, off + 3 * copy_word_size);
+		off += 4 * copy_word_size;
+	} while (off < half_copy_loop_size);
+	pg_addiu(&buf, A1, A1, 2 * off);
+	pg_addiu(&buf, A0, A0, 2 * off);
+	off = -off;
+	do {
+		build_copy_load_pref(&buf, off);
+		build_copy_load(&buf, T0, off);
+		build_copy_load_pref(&buf, off + copy_word_size);
+		build_copy_load(&buf, T1, off + copy_word_size);
+		build_copy_load_pref(&buf, off + 2 * copy_word_size);
+		build_copy_load(&buf, T2, off + 2 * copy_word_size);
+		build_copy_load_pref(&buf, off + 3 * copy_word_size);
+		build_copy_load(&buf, T3, off + 3 * copy_word_size);
+		build_copy_store_pref(&buf, off);
+		build_copy_store(&buf, T0, off);
+		build_copy_store_pref(&buf, off + copy_word_size);
+		build_copy_store(&buf, T1, off + copy_word_size);
+		build_copy_store_pref(&buf, off + 2 * copy_word_size);
+		build_copy_store(&buf, T2, off + 2 * copy_word_size);
+		build_copy_store_pref(&buf, off + 3 * copy_word_size);
+		if (off == -(4 * copy_word_size))
+			uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
+		build_copy_store(&buf, T3, off + 3 * copy_word_size);
+		off += 4 * copy_word_size;
+	} while (off < 0);
+
+	if (pref_bias_copy_load - pref_bias_copy_store) {
+		pg_addiu(&buf, A2, A0,
+			 pref_bias_copy_load - pref_bias_copy_store);
+		uasm_l_copy_pref_store(&l, buf);
+		off = 0;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store_pref(&buf, off);
+			build_copy_store(&buf, T0, off);
+			build_copy_store_pref(&buf, off + copy_word_size);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store_pref(&buf, off + 2 * copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store_pref(&buf, off + 3 * copy_word_size);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < half_copy_loop_size);
+		pg_addiu(&buf, A1, A1, 2 * off);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store_pref(&buf, off);
+			build_copy_store(&buf, T0, off);
+			build_copy_store_pref(&buf, off + copy_word_size);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store_pref(&buf, off + 2 * copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store_pref(&buf, off + 3 * copy_word_size);
+			if (off == -(4 * copy_word_size))
+				uasm_il_bne(&buf, &r, A2, A0,
+					    label_copy_pref_store);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < 0);
+	}
+
+	if (pref_bias_copy_store) {
+		pg_addiu(&buf, A2, A0, pref_bias_copy_store);
+		uasm_l_copy_nopref(&l, buf);
+		off = 0;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store(&buf, T0, off);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < half_copy_loop_size);
+		pg_addiu(&buf, A1, A1, 2 * off);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store(&buf, T0, off);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			if (off == -(4 * copy_word_size))
+				uasm_il_bne(&buf, &r, A2, A0,
+					    label_copy_nopref);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < 0);
+	}
+
+	uasm_i_jr(&buf, RA);
+	uasm_i_nop(&buf);
+
+	BUG_ON(buf > &__copy_page_end);
+
+	uasm_resolve_relocs(relocs, labels);
+
+	pr_debug("Synthesized copy page handler (%u instructions).\n",
+		 (u32)(buf - &__copy_page_start));
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+	for (i = 0; i < (buf - &__copy_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
+	pr_debug("\t.set pop\n");
+}
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+extern void clear_page_cpu(void *page);
+extern void copy_page_cpu(void *to, void *from);
+
+/*
+ * Pad descriptors to cacheline, since each is exclusively owned by a
+ * particular CPU.
+ */
+struct dmadscr {
+	u64 dscr_a;
+	u64 dscr_b;
+	u64 pad_a;
+	u64 pad_b;
+} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
+
+void clear_page(void *page)
+{
+	u64 to_phys = CPHYSADDR((unsigned long)page);
+	unsigned int cpu = smp_processor_id();
+
+	/* if the page is not in KSEG0, use old way */
+	if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
+		return clear_page_cpu(page);
+
+	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
+				 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+		 & M_DM_DSCR_BASE_INTERRUPT))
+		;
+	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+EXPORT_SYMBOL(clear_page);
+
+void copy_page(void *to, void *from)
+{
+	u64 from_phys = CPHYSADDR((unsigned long)from);
+	u64 to_phys = CPHYSADDR((unsigned long)to);
+	unsigned int cpu = smp_processor_id();
+
+	/* if any page is not in KSEG0, use old way */
+	if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
+	    || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
+		return copy_page_cpu(to, from);
+
+	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
+				 M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+		 & M_DM_DSCR_BASE_INTERRUPT))
+		;
+	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+EXPORT_SYMBOL(copy_page);
+
+#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
new file mode 100644
index 0000000000..84dd5136d5
--- /dev/null
+++ b/arch/mips/mm/pgtable-32.c
@@ -0,0 +1,89 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003 by Ralf Baechle
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/highmem.h>
+#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+void pgd_init(void *addr)
+{
+	unsigned long *p = (unsigned long *)addr;
+	int i;
+
+	for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
+		p[i + 0] = (unsigned long) invalid_pte_table;
+		p[i + 1] = (unsigned long) invalid_pte_table;
+		p[i + 2] = (unsigned long) invalid_pte_table;
+		p[i + 3] = (unsigned long) invalid_pte_table;
+		p[i + 4] = (unsigned long) invalid_pte_table;
+		p[i + 5] = (unsigned long) invalid_pte_table;
+		p[i + 6] = (unsigned long) invalid_pte_table;
+		p[i + 7] = (unsigned long) invalid_pte_table;
+	}
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+pmd_t mk_pmd(struct page *page, pgprot_t prot)
+{
+	pmd_t pmd;
+
+	pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);
+
+	return pmd;
+}
+
+
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
+
+void __init pagetable_init(void)
+{
+	unsigned long vaddr;
+	pgd_t *pgd_base;
+#ifdef CONFIG_HIGHMEM
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+#endif
+
+	/* Initialize the entire pgd.  */
+	pgd_init(swapper_pg_dir);
+	pgd_init(&swapper_pg_dir[USER_PTRS_PER_PGD]);
+
+	pgd_base = swapper_pg_dir;
+
+	/*
+	 * Fixed mappings:
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+	fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
+
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Permanent kmaps:
+	 */
+	vaddr = PKMAP_BASE;
+	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	pte = pte_offset_kernel(pmd, vaddr);
+	pkmap_page_table = pte;
+#endif
+}
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
new file mode 100644
index 0000000000..c76d21f7df
--- /dev/null
+++ b/arch/mips/mm/pgtable-64.c
@@ -0,0 +1,126 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999, 2000 by Silicon Graphics
+ * Copyright (C) 2003 by Ralf Baechle
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+void pgd_init(void *addr)
+{
+	unsigned long *p, *end;
+	unsigned long entry;
+
+#if !defined(__PAGETABLE_PUD_FOLDED)
+	entry = (unsigned long)invalid_pud_table;
+#elif !defined(__PAGETABLE_PMD_FOLDED)
+	entry = (unsigned long)invalid_pmd_table;
+#else
+	entry = (unsigned long)invalid_pte_table;
+#endif
+
+	p = (unsigned long *) addr;
+	end = p + PTRS_PER_PGD;
+
+	do {
+		p[0] = entry;
+		p[1] = entry;
+		p[2] = entry;
+		p[3] = entry;
+		p[4] = entry;
+		p += 8;
+		p[-3] = entry;
+		p[-2] = entry;
+		p[-1] = entry;
+	} while (p != end);
+}
+
+#ifndef __PAGETABLE_PMD_FOLDED
+void pmd_init(void *addr)
+{
+	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pte_table;
+
+	p = (unsigned long *)addr;
+	end = p + PTRS_PER_PMD;
+
+	do {
+		p[0] = pagetable;
+		p[1] = pagetable;
+		p[2] = pagetable;
+		p[3] = pagetable;
+		p[4] = pagetable;
+		p += 8;
+		p[-3] = pagetable;
+		p[-2] = pagetable;
+		p[-1] = pagetable;
+	} while (p != end);
+}
+EXPORT_SYMBOL_GPL(pmd_init);
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+void pud_init(void *addr)
+{
+	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pmd_table;
+
+	p = (unsigned long *)addr;
+	end = p + PTRS_PER_PUD;
+
+	do {
+		p[0] = pagetable;
+		p[1] = pagetable;
+		p[2] = pagetable;
+		p[3] = pagetable;
+		p[4] = pagetable;
+		p += 8;
+		p[-3] = pagetable;
+		p[-2] = pagetable;
+		p[-1] = pagetable;
+	} while (p != end);
+}
+#endif
+
+pmd_t mk_pmd(struct page *page, pgprot_t prot)
+{
+	pmd_t pmd;
+
+	pmd_val(pmd) = (page_to_pfn(page) << PFN_PTE_SHIFT) | pgprot_val(prot);
+
+	return pmd;
+}
+
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
+void __init pagetable_init(void)
+{
+	unsigned long vaddr;
+	pgd_t *pgd_base;
+
+	/* Initialize the entire pgd.  */
+	pgd_init(swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+	pud_init(invalid_pud_table);
+#endif
+#ifndef __PAGETABLE_PMD_FOLDED
+	pmd_init(invalid_pmd_table);
+#endif
+	pgd_base = swapper_pg_dir;
+	/*
+	 * Fixed mappings:
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
+}
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
new file mode 100644
index 0000000000..1506e45804
--- /dev/null
+++ b/arch/mips/mm/pgtable.c
@@ -0,0 +1,27 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/pgalloc.h>
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *init, *ret = NULL;
+	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM,
+			PGD_TABLE_ORDER);
+
+	if (ptdesc) {
+		ret = ptdesc_address(ptdesc);
+		init = pgd_offset(&init_mm, 0UL);
+		pgd_init(ret);
+		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
+		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pgd_alloc);
diff --git a/arch/mips/mm/physaddr.c b/arch/mips/mm/physaddr.c
new file mode 100644
index 0000000000..f9b8c85e98
--- /dev/null
+++ b/arch/mips/mm/physaddr.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/addrspace.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+
+static inline bool __debug_virt_addr_valid(unsigned long x)
+{
+	/*
+	 * MAX_DMA_ADDRESS is a virtual address that may not correspond to an
+	 * actual physical address. Enough code relies on
+	 * virt_to_phys(MAX_DMA_ADDRESS) that we just need to work around it
+	 * and always return true.
+	 */
+	if (x == MAX_DMA_ADDRESS)
+		return true;
+
+	return x >= PAGE_OFFSET && (KSEGX(x) < KSEG2 ||
+	       IS_ENABLED(CONFIG_EVA) ||
+	       !IS_ENABLED(CONFIG_HIGHMEM));
+}
+
+phys_addr_t __virt_to_phys(volatile const void *x)
+{
+	WARN(!__debug_virt_addr_valid((unsigned long)x),
+	     "virt_to_phys used for non-linear address: %pK (%pS)\n",
+	     x, x);
+
+	return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+	/* This is bounds checking against the kernel image only.
+	 * __pa_symbol should only be used on kernel symbol addresses.
+	 */
+	VIRTUAL_BUG_ON(x < (unsigned long)_text ||
+		       x > (unsigned long)_end);
+
+	return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c
new file mode 100644
index 0000000000..80ff394715
--- /dev/null
+++ b/arch/mips/mm/sc-debugfs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015 Imagination Technologies
+ * Author: Paul Burton <paul.burton@mips.com>
+ */
+
+#include <asm/bcache.h>
+#include <asm/debug.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+
+static ssize_t sc_prefetch_read(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	bool enabled = bc_prefetch_is_enabled();
+	char buf[3];
+
+	buf[0] = enabled ? 'Y' : 'N';
+	buf[1] = '\n';
+	buf[2] = 0;
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t sc_prefetch_write(struct file *file,
+				 const char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	bool enabled;
+	int err;
+
+	err = kstrtobool_from_user(user_buf, count, &enabled);
+	if (err)
+		return err;
+
+	if (enabled)
+		bc_prefetch_enable();
+	else
+		bc_prefetch_disable();
+
+	return count;
+}
+
+static const struct file_operations sc_prefetch_fops = {
+	.open = simple_open,
+	.llseek = default_llseek,
+	.read = sc_prefetch_read,
+	.write = sc_prefetch_write,
+};
+
+static int __init sc_debugfs_init(void)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir("l2cache", mips_debugfs_dir);
+	debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir, NULL,
+			    &sc_prefetch_fops);
+	return 0;
+}
+late_initcall(sc_debugfs_init);
diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c
new file mode 100644
index 0000000000..d7238687d7
--- /dev/null
+++ b/arch/mips/mm/sc-ip22.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sc-ip22.c: Indy cache management functions.
+ *
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org),
+ * derived from r4xx0.c by David S. Miller (davem@davemloft.net).
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/bcache.h>
+#include <asm/page.h>
+#include <asm/bootinfo.h>
+#include <asm/sgi/ip22.h>
+#include <asm/sgi/mc.h>
+
+/* Secondary cache size in bytes, if present.  */
+static unsigned long scache_size;
+
+#undef DEBUG_CACHE
+
+#define SC_SIZE 0x00080000
+#define SC_LINE 32
+#define CI_MASK (SC_SIZE - SC_LINE)
+#define SC_INDEX(n) ((n) & CI_MASK)
+
+static inline void indy_sc_wipe(unsigned long first, unsigned long last)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+	"	.set	push			# indy_sc_wipe		\n"
+	"	.set	noreorder					\n"
+	"	.set	mips3						\n"
+	"	.set	noat						\n"
+	"	mfc0	%2, $12						\n"
+	"	li	$1, 0x80		# Go 64 bit		\n"
+	"	mtc0	$1, $12						\n"
+	"								\n"
+	"	#							\n"
+	"	# Open code a dli $1, 0x9000000080000000		\n"
+	"	#							\n"
+	"	# Required because binutils 2.25 will happily accept	\n"
+	"	# 64 bit instructions in .set mips3 mode but puke on	\n"
+	"	# 64 bit constants when generating 32 bit ELF		\n"
+	"	#							\n"
+	"	lui	$1,0x9000					\n"
+	"	dsll	$1,$1,0x10					\n"
+	"	ori	$1,$1,0x8000					\n"
+	"	dsll	$1,$1,0x10					\n"
+	"								\n"
+	"	or	%0, $1			# first line to flush	\n"
+	"	or	%1, $1			# last line to flush	\n"
+	"	.set	at						\n"
+	"								\n"
+	"1:	sw	$0, 0(%0)					\n"
+	"	bne	%0, %1, 1b					\n"
+	"	 daddu	%0, 32						\n"
+	"								\n"
+	"	mtc0	%2, $12			# Back to 32 bit	\n"
+	"	nop				# pipeline hazard	\n"
+	"	nop							\n"
+	"	nop							\n"
+	"	nop							\n"
+	"	.set	pop						\n"
+	: "=r" (first), "=r" (last), "=&r" (tmp)
+	: "0" (first), "1" (last));
+}
+
+static void indy_sc_wback_invalidate(unsigned long addr, unsigned long size)
+{
+	unsigned long first_line, last_line;
+	unsigned long flags;
+
+#ifdef DEBUG_CACHE
+	printk("indy_sc_wback_invalidate[%08lx,%08lx]", addr, size);
+#endif
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	/* Which lines to flush?  */
+	first_line = SC_INDEX(addr);
+	last_line = SC_INDEX(addr + size - 1);
+
+	local_irq_save(flags);
+	if (first_line <= last_line) {
+		indy_sc_wipe(first_line, last_line);
+		goto out;
+	}
+
+	indy_sc_wipe(first_line, SC_SIZE - SC_LINE);
+	indy_sc_wipe(0, last_line);
+out:
+	local_irq_restore(flags);
+}
+
+static void indy_sc_enable(void)
+{
+	unsigned long addr, tmp1, tmp2;
+
+	/* This is really cool... */
+#ifdef DEBUG_CACHE
+	printk("Enabling R4600 SCACHE\n");
+#endif
+	__asm__ __volatile__(
+	".set\tpush\n\t"
+	".set\tnoreorder\n\t"
+	".set\tmips3\n\t"
+	"mfc0\t%2, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"li\t%1, 0x80\n\t"
+	"mtc0\t%1, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"li\t%0, 0x1\n\t"
+	"dsll\t%0, 31\n\t"
+	"lui\t%1, 0x9000\n\t"
+	"dsll32\t%1, 0\n\t"
+	"or\t%0, %1, %0\n\t"
+	"sb\t$0, 0(%0)\n\t"
+	"mtc0\t$0, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"mtc0\t%2, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	".set\tpop"
+	: "=r" (tmp1), "=r" (tmp2), "=r" (addr));
+}
+
+static void indy_sc_disable(void)
+{
+	unsigned long tmp1, tmp2, tmp3;
+
+#ifdef DEBUG_CACHE
+	printk("Disabling R4600 SCACHE\n");
+#endif
+	__asm__ __volatile__(
+	".set\tpush\n\t"
+	".set\tnoreorder\n\t"
+	".set\tmips3\n\t"
+	"li\t%0, 0x1\n\t"
+	"dsll\t%0, 31\n\t"
+	"lui\t%1, 0x9000\n\t"
+	"dsll32\t%1, 0\n\t"
+	"or\t%0, %1, %0\n\t"
+	"mfc0\t%2, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"li\t%1, 0x80\n\t"
+	"mtc0\t%1, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"sh\t$0, 0(%0)\n\t"
+	"mtc0\t$0, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"mtc0\t%2, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	".set\tpop"
+	: "=r" (tmp1), "=r" (tmp2), "=r" (tmp3));
+}
+
+static inline int __init indy_sc_probe(void)
+{
+	unsigned int size = ip22_eeprom_read(&sgimc->eeprom, 17);
+	if (size == 0)
+		return 0;
+
+	size <<= PAGE_SHIFT;
+	printk(KERN_INFO "R4600/R5000 SCACHE size %dK, linesize 32 bytes.\n",
+	       size >> 10);
+	scache_size = size;
+
+	return 1;
+}
+
+/* XXX Check with wje if the Indy caches can differentiate between
+   writeback + invalidate and just invalidate.	*/
+static struct bcache_ops indy_sc_ops = {
+	.bc_enable = indy_sc_enable,
+	.bc_disable = indy_sc_disable,
+	.bc_wback_inv = indy_sc_wback_invalidate,
+	.bc_inv = indy_sc_wback_invalidate
+};
+
+void indy_sc_init(void)
+{
+	if (indy_sc_probe()) {
+		indy_sc_enable();
+		bcops = &indy_sc_ops;
+	}
+}
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
new file mode 100644
index 0000000000..06ec304ad4
--- /dev/null
+++ b/arch/mips/mm/sc-mips.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2006 Chris Dearman (chris@mips.com),
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/cpu-type.h>
+#include <asm/mipsregs.h>
+#include <asm/bcache.h>
+#include <asm/cacheops.h>
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/r4kcache.h>
+#include <asm/mips-cps.h>
+#include <asm/bootinfo.h>
+
+/*
+ * MIPS32/MIPS64 L2 cache handling
+ */
+
+/*
+ * Writeback and invalidate the secondary cache before DMA.
+ */
+static void mips_sc_wback_inv(unsigned long addr, unsigned long size)
+{
+	blast_scache_range(addr, addr + size);
+}
+
+/*
+ * Invalidate the secondary cache before DMA.
+ */
+static void mips_sc_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long lsize = cpu_scache_line_size();
+	unsigned long almask = ~(lsize - 1);
+
+	cache_op(Hit_Writeback_Inv_SD, addr & almask);
+	cache_op(Hit_Writeback_Inv_SD, (addr + size - 1) & almask);
+	blast_inv_scache_range(addr, addr + size);
+}
+
+static void mips_sc_enable(void)
+{
+	/* L2 cache is permanently enabled */
+}
+
+static void mips_sc_disable(void)
+{
+	/* L2 cache is permanently enabled */
+}
+
+static void mips_sc_prefetch_enable(void)
+{
+	unsigned long pftctl;
+
+	if (mips_cm_revision() < CM_REV_CM2_5)
+		return;
+
+	/*
+	 * If there is one or more L2 prefetch unit present then enable
+	 * prefetching for both code & data, for all ports.
+	 */
+	pftctl = read_gcr_l2_pft_control();
+	if (pftctl & CM_GCR_L2_PFT_CONTROL_NPFT) {
+		pftctl &= ~CM_GCR_L2_PFT_CONTROL_PAGEMASK;
+		pftctl |= PAGE_MASK & CM_GCR_L2_PFT_CONTROL_PAGEMASK;
+		pftctl |= CM_GCR_L2_PFT_CONTROL_PFTEN;
+		write_gcr_l2_pft_control(pftctl);
+
+		set_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID |
+					 CM_GCR_L2_PFT_CONTROL_B_CEN);
+	}
+}
+
+static void mips_sc_prefetch_disable(void)
+{
+	if (mips_cm_revision() < CM_REV_CM2_5)
+		return;
+
+	clear_gcr_l2_pft_control(CM_GCR_L2_PFT_CONTROL_PFTEN);
+	clear_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID |
+				   CM_GCR_L2_PFT_CONTROL_B_CEN);
+}
+
+static bool mips_sc_prefetch_is_enabled(void)
+{
+	unsigned long pftctl;
+
+	if (mips_cm_revision() < CM_REV_CM2_5)
+		return false;
+
+	pftctl = read_gcr_l2_pft_control();
+	if (!(pftctl & CM_GCR_L2_PFT_CONTROL_NPFT))
+		return false;
+	return !!(pftctl & CM_GCR_L2_PFT_CONTROL_PFTEN);
+}
+
+static struct bcache_ops mips_sc_ops = {
+	.bc_enable = mips_sc_enable,
+	.bc_disable = mips_sc_disable,
+	.bc_wback_inv = mips_sc_wback_inv,
+	.bc_inv = mips_sc_inv,
+	.bc_prefetch_enable = mips_sc_prefetch_enable,
+	.bc_prefetch_disable = mips_sc_prefetch_disable,
+	.bc_prefetch_is_enabled = mips_sc_prefetch_is_enabled,
+};
+
+/*
+ * Check if the L2 cache controller is activated on a particular platform.
+ * MTI's L2 controller and the L2 cache controller of Broadcom's BMIPS
+ * cores both use c0_config2's bit 12 as "L2 Bypass" bit, that is the
+ * cache being disabled.  However there is no guarantee for this to be
+ * true on all platforms.  In an act of stupidity the spec defined bits
+ * 12..15 as implementation defined so below function will eventually have
+ * to be replaced by a platform specific probe.
+ */
+static inline int mips_sc_is_activated(struct cpuinfo_mips *c)
+{
+	unsigned int config2 = read_c0_config2();
+	unsigned int tmp;
+
+	/* Check the bypass bit (L2B) */
+	switch (current_cpu_type()) {
+	case CPU_34K:
+	case CPU_74K:
+	case CPU_1004K:
+	case CPU_1074K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
+	case CPU_P5600:
+	case CPU_BMIPS5000:
+	case CPU_QEMU_GENERIC:
+	case CPU_P6600:
+		if (config2 & (1 << 12))
+			return 0;
+	}
+
+	tmp = (config2 >> 4) & 0x0f;
+	if (0 < tmp && tmp <= 7)
+		c->scache.linesz = 2 << tmp;
+	else
+		return 0;
+	return 1;
+}
+
+static int mips_sc_probe_cm3(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned long cfg = read_gcr_l2_config();
+	unsigned long sets, line_sz, assoc;
+
+	if (cfg & CM_GCR_L2_CONFIG_BYPASS)
+		return 0;
+
+	sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE;
+	sets >>= __ffs(CM_GCR_L2_CONFIG_SET_SIZE);
+	if (sets)
+		c->scache.sets = 64 << sets;
+
+	line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE;
+	line_sz >>= __ffs(CM_GCR_L2_CONFIG_LINE_SIZE);
+	if (line_sz)
+		c->scache.linesz = 2 << line_sz;
+
+	assoc = cfg & CM_GCR_L2_CONFIG_ASSOC;
+	assoc >>= __ffs(CM_GCR_L2_CONFIG_ASSOC);
+	c->scache.ways = assoc + 1;
+	c->scache.waysize = c->scache.sets * c->scache.linesz;
+	c->scache.waybit = __ffs(c->scache.waysize);
+
+	if (c->scache.linesz) {
+		c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+		c->options |= MIPS_CPU_INCLUSIVE_CACHES;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int mips_sc_probe(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config1, config2;
+	unsigned int tmp;
+
+	/* Mark as not present until probe completed */
+	c->scache.flags |= MIPS_CACHE_NOT_PRESENT;
+
+	if (mips_cm_revision() >= CM_REV_CM3)
+		return mips_sc_probe_cm3();
+
+	/* Ignore anything but MIPSxx processors */
+	if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+			      MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			      MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+			      MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)))
+		return 0;
+
+	/* Does this MIPS32/MIPS64 CPU have a config2 register? */
+	config1 = read_c0_config1();
+	if (!(config1 & MIPS_CONF_M))
+		return 0;
+
+	config2 = read_c0_config2();
+
+	if (!mips_sc_is_activated(c))
+		return 0;
+
+	tmp = (config2 >> 8) & 0x0f;
+	if (tmp <= 7)
+		c->scache.sets = 64 << tmp;
+	else
+		return 0;
+
+	tmp = (config2 >> 0) & 0x0f;
+	if (tmp <= 7)
+		c->scache.ways = tmp + 1;
+	else
+		return 0;
+
+	if (current_cpu_type() == CPU_XBURST) {
+		switch (mips_machtype) {
+		/*
+		 * According to config2 it would be 5-ways, but that is
+		 * contradicted by all documentation.
+		 */
+		case MACH_INGENIC_JZ4770:
+		case MACH_INGENIC_JZ4775:
+			c->scache.ways = 4;
+			break;
+
+		/*
+		 * According to config2 it would be 5-ways and 512-sets,
+		 * but that is contradicted by all documentation.
+		 */
+		case MACH_INGENIC_X1000:
+		case MACH_INGENIC_X1000E:
+			c->scache.sets = 256;
+			c->scache.ways = 4;
+			break;
+		}
+	}
+
+	c->scache.waysize = c->scache.sets * c->scache.linesz;
+	c->scache.waybit = __ffs(c->scache.waysize);
+
+	c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+
+	return 1;
+}
+
+int mips_sc_init(void)
+{
+	int found = mips_sc_probe();
+	if (found) {
+		mips_sc_enable();
+		mips_sc_prefetch_enable();
+		bcops = &mips_sc_ops;
+	}
+	return found;
+}
diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c
new file mode 100644
index 0000000000..736615d68f
--- /dev/null
+++ b/arch/mips/mm/sc-r5k.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org),
+ * derived from r4xx0.c by David S. Miller (davem@davemloft.net).
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/mipsregs.h>
+#include <asm/bcache.h>
+#include <asm/cacheops.h>
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/r4kcache.h>
+
+/* Secondary cache size in bytes, if present.  */
+static unsigned long scache_size;
+
+#define SC_LINE 32
+#define SC_PAGE (128*SC_LINE)
+
+static inline void blast_r5000_scache(void)
+{
+	unsigned long start = INDEX_BASE;
+	unsigned long end = start + scache_size;
+
+	while(start < end) {
+		cache_op(R5K_Page_Invalidate_S, start);
+		start += SC_PAGE;
+	}
+}
+
+static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	if (size >= scache_size) {
+		blast_r5000_scache();
+		return;
+	}
+
+	/* On the R5000 secondary cache we cannot
+	 * invalidate less than a page at a time.
+	 * The secondary cache is physically indexed, write-through.
+	 */
+	a = addr & ~(SC_PAGE - 1);
+	end = (addr + size - 1) & ~(SC_PAGE - 1);
+	while (a <= end) {
+		cache_op(R5K_Page_Invalidate_S, a);
+		a += SC_PAGE;
+	}
+}
+
+static void r5k_sc_enable(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	set_c0_config(R5K_CONF_SE);
+	blast_r5000_scache();
+	local_irq_restore(flags);
+}
+
+static void r5k_sc_disable(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	blast_r5000_scache();
+	clear_c0_config(R5K_CONF_SE);
+	local_irq_restore(flags);
+}
+
+static inline int __init r5k_sc_probe(void)
+{
+	unsigned long config = read_c0_config();
+
+	if (config & CONF_SC)
+		return 0;
+
+	scache_size = (512 * 1024) << ((config & R5K_CONF_SS) >> 20);
+
+	printk("R5000 SCACHE size %ldkB, linesize 32 bytes.\n",
+			scache_size >> 10);
+
+	return 1;
+}
+
+static struct bcache_ops r5k_sc_ops = {
+	.bc_enable = r5k_sc_enable,
+	.bc_disable = r5k_sc_disable,
+	.bc_wback_inv = r5k_dma_cache_inv_sc,
+	.bc_inv = r5k_dma_cache_inv_sc
+};
+
+void r5k_sc_init(void)
+{
+	if (r5k_sc_probe()) {
+		r5k_sc_enable();
+		bcops = &r5k_sc_ops;
+	}
+}
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
new file mode 100644
index 0000000000..e9e3777a78
--- /dev/null
+++ b/arch/mips/mm/sc-rm7k.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sc-rm7k.c: RM7000 cache management functions.
+ *
+ * Copyright (C) 1997, 2001, 2003, 2004 Ralf Baechle (ralf@linux-mips.org)
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+
+#include <asm/addrspace.h>
+#include <asm/bcache.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h> /* for run_uncached() */
+
+/* Primary cache parameters. */
+#define sc_lsize	32
+#define tc_pagesize	(32*128)
+
+/* Secondary cache parameters. */
+#define scache_size	(256*1024)	/* Fixed to 256KiB on RM7000 */
+
+/* Tertiary cache parameters */
+#define tc_lsize	32
+
+extern unsigned long icache_way_size, dcache_way_size;
+static unsigned long tcache_size;
+
+#include <asm/r4kcache.h>
+
+static int rm7k_tcache_init;
+
+/*
+ * Writeback and invalidate the primary cache dcache before DMA.
+ * (XXX These need to be fixed ...)
+ */
+static void rm7k_sc_wback_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	pr_debug("rm7k_sc_wback_inv[%08lx,%08lx]", addr, size);
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	blast_scache_range(addr, addr + size);
+
+	if (!rm7k_tcache_init)
+		return;
+
+	a = addr & ~(tc_pagesize - 1);
+	end = (addr + size - 1) & ~(tc_pagesize - 1);
+	while(1) {
+		invalidate_tcache_page(a);	/* Page_Invalidate_T */
+		if (a == end)
+			break;
+		a += tc_pagesize;
+	}
+}
+
+static void rm7k_sc_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	pr_debug("rm7k_sc_inv[%08lx,%08lx]", addr, size);
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	blast_inv_scache_range(addr, addr + size);
+
+	if (!rm7k_tcache_init)
+		return;
+
+	a = addr & ~(tc_pagesize - 1);
+	end = (addr + size - 1) & ~(tc_pagesize - 1);
+	while(1) {
+		invalidate_tcache_page(a);	/* Page_Invalidate_T */
+		if (a == end)
+			break;
+		a += tc_pagesize;
+	}
+}
+
+static void blast_rm7k_tcache(void)
+{
+	unsigned long start = CKSEG0ADDR(0);
+	unsigned long end = start + tcache_size;
+
+	write_c0_taglo(0);
+
+	while (start < end) {
+		cache_op(Page_Invalidate_T, start);
+		start += tc_pagesize;
+	}
+}
+
+/*
+ * This function is executed in uncached address space.
+ */
+static void __rm7k_tc_enable(void)
+{
+	int i;
+
+	set_c0_config(RM7K_CONF_TE);
+
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (i = 0; i < tcache_size; i += tc_lsize)
+		cache_op(Index_Store_Tag_T, CKSEG0ADDR(i));
+}
+
+static void rm7k_tc_enable(void)
+{
+	if (read_c0_config() & RM7K_CONF_TE)
+		return;
+
+	BUG_ON(tcache_size == 0);
+
+	run_uncached(__rm7k_tc_enable);
+}
+
+/*
+ * This function is executed in uncached address space.
+ */
+static void __rm7k_sc_enable(void)
+{
+	int i;
+
+	set_c0_config(RM7K_CONF_SE);
+
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (i = 0; i < scache_size; i += sc_lsize)
+		cache_op(Index_Store_Tag_SD, CKSEG0ADDR(i));
+}
+
+static void rm7k_sc_enable(void)
+{
+	if (read_c0_config() & RM7K_CONF_SE)
+		return;
+
+	pr_info("Enabling secondary cache...\n");
+	run_uncached(__rm7k_sc_enable);
+
+	if (rm7k_tcache_init)
+		rm7k_tc_enable();
+}
+
+static void rm7k_tc_disable(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	blast_rm7k_tcache();
+	clear_c0_config(RM7K_CONF_TE);
+	local_irq_restore(flags);
+}
+
+static void rm7k_sc_disable(void)
+{
+	clear_c0_config(RM7K_CONF_SE);
+
+	if (rm7k_tcache_init)
+		rm7k_tc_disable();
+}
+
+static struct bcache_ops rm7k_sc_ops = {
+	.bc_enable = rm7k_sc_enable,
+	.bc_disable = rm7k_sc_disable,
+	.bc_wback_inv = rm7k_sc_wback_inv,
+	.bc_inv = rm7k_sc_inv
+};
+
+/*
+ * This is a probing function like the one found in c-r4k.c, we look for the
+ * wrap around point with different addresses.
+ */
+static void __probe_tcache(void)
+{
+	unsigned long flags, addr, begin, end, pow2;
+
+	begin = (unsigned long) &_stext;
+	begin  &= ~((8 * 1024 * 1024) - 1);
+	end = begin + (8 * 1024 * 1024);
+
+	local_irq_save(flags);
+
+	set_c0_config(RM7K_CONF_TE);
+
+	/* Fill size-multiple lines with a valid tag */
+	pow2 = (256 * 1024);
+	for (addr = begin; addr <= end; addr = (begin + pow2)) {
+		unsigned long *p = (unsigned long *) addr;
+		__asm__ __volatile__("nop" : : "r" (*p));
+		pow2 <<= 1;
+	}
+
+	/* Load first line with a 0 tag, to check after */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+	cache_op(Index_Store_Tag_T, begin);
+
+	/* Look for the wrap-around */
+	pow2 = (512 * 1024);
+	for (addr = begin + (512 * 1024); addr <= end; addr = begin + pow2) {
+		cache_op(Index_Load_Tag_T, addr);
+		if (!read_c0_taglo())
+			break;
+		pow2 <<= 1;
+	}
+
+	addr -= begin;
+	tcache_size = addr;
+
+	clear_c0_config(RM7K_CONF_TE);
+
+	local_irq_restore(flags);
+}
+
+void rm7k_sc_init(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config = read_c0_config();
+
+	if ((config & RM7K_CONF_SC))
+		return;
+
+	c->scache.linesz = sc_lsize;
+	c->scache.ways = 4;
+	c->scache.waybit= __ffs(scache_size / c->scache.ways);
+	c->scache.waysize = scache_size / c->scache.ways;
+	c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways);
+	printk(KERN_INFO "Secondary cache size %dK, linesize %d bytes.\n",
+	       (scache_size >> 10), sc_lsize);
+
+	if (!(config & RM7K_CONF_SE))
+		rm7k_sc_enable();
+
+	bcops = &rm7k_sc_ops;
+
+	/*
+	 * While we're at it let's deal with the tertiary cache.
+	 */
+
+	rm7k_tcache_init = 0;
+	tcache_size = 0;
+
+	if (config & RM7K_CONF_TC)
+		return;
+
+	/*
+	 * No efficient way to ask the hardware for the size of the tcache,
+	 * so must probe for it.
+	 */
+	run_uncached(__probe_tcache);
+	rm7k_tc_enable();
+	rm7k_tcache_init = 1;
+	c->tcache.linesz = tc_lsize;
+	c->tcache.ways = 1;
+	pr_info("Tertiary cache size %ldK.\n", (tcache_size >> 10));
+}
diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S
new file mode 100644
index 0000000000..2705d7dcb3
--- /dev/null
+++ b/arch/mips/mm/tlb-funcs.S
@@ -0,0 +1,40 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Micro-assembler generated tlb handler functions.
+ *
+ * Copyright (C) 2013  Broadcom Corporation.
+ *
+ * Based on mm/page-funcs.c
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/export.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+
+#define FASTPATH_SIZE	128
+
+LEAF(tlbmiss_handler_setup_pgd)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space	64
+END(tlbmiss_handler_setup_pgd)
+EXPORT(tlbmiss_handler_setup_pgd_end)
+EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd)
+
+LEAF(handle_tlbm)
+	.space		FASTPATH_SIZE * 4
+END(handle_tlbm)
+EXPORT(handle_tlbm_end)
+
+LEAF(handle_tlbs)
+	.space		FASTPATH_SIZE * 4
+END(handle_tlbs)
+EXPORT(handle_tlbs_end)
+
+LEAF(handle_tlbl)
+	.space		FASTPATH_SIZE * 4
+END(handle_tlbl)
+EXPORT(handle_tlbl_end)
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
new file mode 100644
index 0000000000..53dfa2b931
--- /dev/null
+++ b/arch/mips/mm/tlb-r3k.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r2300.c: R2000 and R3000 specific mmu/cache code.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ *
+ * with a lot of changes to make this thing work for R3000s
+ * Tx39XX R4k style caches added. HK
+ * Copyright (C) 1998, 1999, 2000 Harald Koerfgen
+ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
+ * Copyright (C) 2002  Ralf Baechle
+ * Copyright (C) 2002  Maciej W. Rozycki
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbmisc.h>
+#include <asm/isadep.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+#undef DEBUG_TLB
+
+extern void build_tlb_refill_handler(void);
+
+/* CP0 hazard avoidance. */
+#define BARRIER				\
+	__asm__ __volatile__(		\
+		".set	push\n\t"	\
+		".set	noreorder\n\t"	\
+		"nop\n\t"		\
+		".set	pop\n\t")
+
+/* TLB operations. */
+static void local_flush_tlb_from(int entry)
+{
+	unsigned long old_ctx;
+
+	old_ctx = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
+	write_c0_entrylo0(0);
+	while (entry < current_cpu_data.tlbsize) {
+		write_c0_index(entry << 8);
+		write_c0_entryhi((entry | 0x80000) << 12);
+		entry++;				/* BARRIER */
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(old_ctx);
+}
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+
+#ifdef DEBUG_TLB
+	printk("[tlball]");
+#endif
+	local_irq_save(flags);
+	local_flush_tlb_from(8);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		unsigned long size, flags;
+
+#ifdef DEBUG_TLB
+		printk("[tlbrange<%lu,0x%08lx,0x%08lx>]",
+			cpu_context(cpu, mm) & asid_mask, start, end);
+#endif
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size <= current_cpu_data.tlbsize) {
+			int oldpid = read_c0_entryhi() & asid_mask;
+			int newpid = cpu_context(cpu, mm) & asid_mask;
+
+			start &= PAGE_MASK;
+			end += PAGE_SIZE - 1;
+			end &= PAGE_MASK;
+			while (start < end) {
+				int idx;
+
+				write_c0_entryhi(start | newpid);
+				start += PAGE_SIZE;	/* BARRIER */
+				tlb_probe();
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entryhi(KSEG0);
+				if (idx < 0)		/* BARRIER */
+					continue;
+				tlb_write_indexed();
+			}
+			write_c0_entryhi(oldpid);
+		} else {
+			drop_mmu_context(mm);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, flags;
+
+#ifdef DEBUG_TLB
+	printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end);
+#endif
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size <= current_cpu_data.tlbsize) {
+		int pid = read_c0_entryhi();
+
+		start &= PAGE_MASK;
+		end += PAGE_SIZE - 1;
+		end &= PAGE_MASK;
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += PAGE_SIZE;		/* BARRIER */
+			tlb_probe();
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entryhi(KSEG0);
+			if (idx < 0)			/* BARRIER */
+				continue;
+			tlb_write_indexed();
+		}
+		write_c0_entryhi(pid);
+	} else {
+		local_flush_tlb_all();
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, vma->vm_mm) != 0) {
+		unsigned long flags;
+		int oldpid, newpid, idx;
+
+#ifdef DEBUG_TLB
+		printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page);
+#endif
+		newpid = cpu_context(cpu, vma->vm_mm) & asid_mask;
+		page &= PAGE_MASK;
+		local_irq_save(flags);
+		oldpid = read_c0_entryhi() & asid_mask;
+		write_c0_entryhi(page | newpid);
+		BARRIER;
+		tlb_probe();
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entryhi(KSEG0);
+		if (idx < 0)				/* BARRIER */
+			goto finish;
+		tlb_write_indexed();
+
+finish:
+		write_c0_entryhi(oldpid);
+		local_irq_restore(flags);
+	}
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
+	unsigned long flags;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	pid = read_c0_entryhi() & asid_mask;
+
+#ifdef DEBUG_TLB
+	if ((pid != (cpu_context(cpu, vma->vm_mm) & asid_mask)) || (cpu_context(cpu, vma->vm_mm) == 0)) {
+		printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n",
+		       (cpu_context(cpu, vma->vm_mm)), pid);
+	}
+#endif
+
+	local_irq_save(flags);
+	address &= PAGE_MASK;
+	write_c0_entryhi(address | pid);
+	BARRIER;
+	tlb_probe();
+	idx = read_c0_index();
+	write_c0_entrylo0(pte_val(pte));
+	write_c0_entryhi(address | pid);
+	if (idx < 0) {					/* BARRIER */
+		tlb_write_random();
+	} else {
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(pid);
+	local_irq_restore(flags);
+}
+
+void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+		     unsigned long entryhi, unsigned long pagemask)
+{
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
+	unsigned long flags;
+	unsigned long old_ctx;
+	static unsigned long wired = 0;
+
+	if (wired < 8) {
+#ifdef DEBUG_TLB
+		printk("[tlbwired<entry lo0 %8x, hi %8x\n>]\n",
+		       entrylo0, entryhi);
+#endif
+
+		local_irq_save(flags);
+		old_ctx = read_c0_entryhi() & asid_mask;
+		write_c0_entrylo0(entrylo0);
+		write_c0_entryhi(entryhi);
+		write_c0_index(wired);
+		wired++;				/* BARRIER */
+		tlb_write_indexed();
+		write_c0_entryhi(old_ctx);
+		local_flush_tlb_all();
+		local_irq_restore(flags);
+	}
+}
+
+void tlb_init(void)
+{
+	local_flush_tlb_from(0);
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
new file mode 100644
index 0000000000..93c2d69558
--- /dev/null
+++ b/arch/mips/mm/tlb-r4k.c
@@ -0,0 +1,591 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org
+ * Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
+ */
+#include <linux/cpu_pm.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/export.h>
+
+#include <asm/cpu.h>
+#include <asm/cpu-type.h>
+#include <asm/bootinfo.h>
+#include <asm/hazards.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/tlbmisc.h>
+
+extern void build_tlb_refill_handler(void);
+
+/*
+ * LOONGSON-2 has a 4 entry itlb which is a subset of jtlb, LOONGSON-3 has
+ * a 4 entry itlb and a 4 entry dtlb which are subsets of jtlb. Unfortunately,
+ * itlb/dtlb are not totally transparent to software.
+ */
+static inline void flush_micro_tlb(void)
+{
+	switch (current_cpu_type()) {
+	case CPU_LOONGSON2EF:
+		write_c0_diag(LOONGSON_DIAG_ITLB);
+		break;
+	case CPU_LOONGSON64:
+		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void flush_micro_tlb_vm(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		flush_micro_tlb();
+}
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	int entry, ftlbhighset;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	htw_stop();
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+
+	entry = num_wired_entries();
+
+	/*
+	 * Blast 'em all away.
+	 * If there are any wired entries, fall back to iterating
+	 */
+	if (cpu_has_tlbinv && !entry) {
+		if (current_cpu_data.tlbsizevtlb) {
+			write_c0_index(0);
+			mtc0_tlbw_hazard();
+			tlbinvf();  /* invalidate VTLB */
+		}
+		ftlbhighset = current_cpu_data.tlbsizevtlb +
+			current_cpu_data.tlbsizeftlbsets;
+		for (entry = current_cpu_data.tlbsizevtlb;
+		     entry < ftlbhighset;
+		     entry++) {
+			write_c0_index(entry);
+			mtc0_tlbw_hazard();
+			tlbinvf();  /* invalidate one FTLB set */
+		}
+	} else {
+		while (entry < current_cpu_data.tlbsize) {
+			/* Make sure all entries differ. */
+			write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+			write_c0_index(entry);
+			mtc0_tlbw_hazard();
+			tlb_write_indexed();
+			entry++;
+		}
+	}
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	htw_start();
+	flush_micro_tlb();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(local_flush_tlb_all);
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		unsigned long size, flags;
+
+		local_irq_save(flags);
+		start = round_down(start, PAGE_SIZE << 1);
+		end = round_up(end, PAGE_SIZE << 1);
+		size = (end - start) >> (PAGE_SHIFT + 1);
+		if (size <= (current_cpu_data.tlbsizeftlbsets ?
+			     current_cpu_data.tlbsize / 8 :
+			     current_cpu_data.tlbsize / 2)) {
+			unsigned long old_entryhi, old_mmid;
+			int newpid = cpu_asid(cpu, mm);
+
+			old_entryhi = read_c0_entryhi();
+			if (cpu_has_mmid) {
+				old_mmid = read_c0_memorymapid();
+				write_c0_memorymapid(newpid);
+			}
+
+			htw_stop();
+			while (start < end) {
+				int idx;
+
+				if (cpu_has_mmid)
+					write_c0_entryhi(start);
+				else
+					write_c0_entryhi(start | newpid);
+				start += (PAGE_SIZE << 1);
+				mtc0_tlbw_hazard();
+				tlb_probe();
+				tlb_probe_hazard();
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entrylo1(0);
+				if (idx < 0)
+					continue;
+				/* Make sure all entries differ. */
+				write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+				mtc0_tlbw_hazard();
+				tlb_write_indexed();
+			}
+			tlbw_use_hazard();
+			write_c0_entryhi(old_entryhi);
+			if (cpu_has_mmid)
+				write_c0_memorymapid(old_mmid);
+			htw_start();
+		} else {
+			drop_mmu_context(mm);
+		}
+		flush_micro_tlb();
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, flags;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+	if (size <= (current_cpu_data.tlbsizeftlbsets ?
+		     current_cpu_data.tlbsize / 8 :
+		     current_cpu_data.tlbsize / 2)) {
+		int pid = read_c0_entryhi();
+
+		start &= (PAGE_MASK << 1);
+		end += ((PAGE_SIZE << 1) - 1);
+		end &= (PAGE_MASK << 1);
+		htw_stop();
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += (PAGE_SIZE << 1);
+			mtc0_tlbw_hazard();
+			tlb_probe();
+			tlb_probe_hazard();
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entrylo1(0);
+			if (idx < 0)
+				continue;
+			/* Make sure all entries differ. */
+			write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+			mtc0_tlbw_hazard();
+			tlb_write_indexed();
+		}
+		tlbw_use_hazard();
+		write_c0_entryhi(pid);
+		htw_start();
+	} else {
+		local_flush_tlb_all();
+	}
+	flush_micro_tlb();
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, vma->vm_mm) != 0) {
+		unsigned long old_mmid;
+		unsigned long flags, old_entryhi;
+		int idx;
+
+		page &= (PAGE_MASK << 1);
+		local_irq_save(flags);
+		old_entryhi = read_c0_entryhi();
+		htw_stop();
+		if (cpu_has_mmid) {
+			old_mmid = read_c0_memorymapid();
+			write_c0_entryhi(page);
+			write_c0_memorymapid(cpu_asid(cpu, vma->vm_mm));
+		} else {
+			write_c0_entryhi(page | cpu_asid(cpu, vma->vm_mm));
+		}
+		mtc0_tlbw_hazard();
+		tlb_probe();
+		tlb_probe_hazard();
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entrylo1(0);
+		if (idx < 0)
+			goto finish;
+		/* Make sure all entries differ. */
+		write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+		mtc0_tlbw_hazard();
+		tlb_write_indexed();
+		tlbw_use_hazard();
+
+	finish:
+		write_c0_entryhi(old_entryhi);
+		if (cpu_has_mmid)
+			write_c0_memorymapid(old_mmid);
+		htw_start();
+		flush_micro_tlb_vm(vma);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * This one is only used for pages with the global bit set so we don't care
+ * much about the ASID.
+ */
+void local_flush_tlb_one(unsigned long page)
+{
+	unsigned long flags;
+	int oldpid, idx;
+
+	local_irq_save(flags);
+	oldpid = read_c0_entryhi();
+	htw_stop();
+	page &= (PAGE_MASK << 1);
+	write_c0_entryhi(page);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	tlb_probe_hazard();
+	idx = read_c0_index();
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	if (idx >= 0) {
+		/* Make sure all entries differ. */
+		write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+		mtc0_tlbw_hazard();
+		tlb_write_indexed();
+		tlbw_use_hazard();
+	}
+	write_c0_entryhi(oldpid);
+	htw_start();
+	flush_micro_tlb();
+	local_irq_restore(flags);
+}
+
+/*
+ * We will need multiple versions of update_mmu_cache(), one that just
+ * updates the TLB with the new pte(s), and another which also checks
+ * for the R4k "end of page" hardware bug and does the needy.
+ */
+void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep, *ptemap = NULL;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	htw_stop();
+	address &= (PAGE_MASK << 1);
+	if (cpu_has_mmid) {
+		write_c0_entryhi(address);
+	} else {
+		pid = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
+		write_c0_entryhi(address | pid);
+	}
+	pgdp = pgd_offset(vma->vm_mm, address);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	tlb_probe_hazard();
+	p4dp = p4d_offset(pgdp, address);
+	pudp = pud_offset(p4dp, address);
+	pmdp = pmd_offset(pudp, address);
+	idx = read_c0_index();
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	/* this could be a huge page  */
+	if (pmd_huge(*pmdp)) {
+		unsigned long lo;
+		write_c0_pagemask(PM_HUGE_MASK);
+		ptep = (pte_t *)pmdp;
+		lo = pte_to_entrylo(pte_val(*ptep));
+		write_c0_entrylo0(lo);
+		write_c0_entrylo1(lo + (HPAGE_SIZE >> 7));
+
+		mtc0_tlbw_hazard();
+		if (idx < 0)
+			tlb_write_random();
+		else
+			tlb_write_indexed();
+		tlbw_use_hazard();
+		write_c0_pagemask(PM_DEFAULT_MASK);
+	} else
+#endif
+	{
+		ptemap = ptep = pte_offset_map(pmdp, address);
+		/*
+		 * update_mmu_cache() is called between pte_offset_map_lock()
+		 * and pte_unmap_unlock(), so we can assume that ptep is not
+		 * NULL here: and what should be done below if it were NULL?
+		 */
+
+#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#ifdef CONFIG_XPA
+		write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
+		if (cpu_has_xpa)
+			writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK);
+		ptep++;
+		write_c0_entrylo1(pte_to_entrylo(ptep->pte_high));
+		if (cpu_has_xpa)
+			writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK);
+#else
+		write_c0_entrylo0(ptep->pte_high);
+		ptep++;
+		write_c0_entrylo1(ptep->pte_high);
+#endif
+#else
+		write_c0_entrylo0(pte_to_entrylo(pte_val(*ptep++)));
+		write_c0_entrylo1(pte_to_entrylo(pte_val(*ptep)));
+#endif
+		mtc0_tlbw_hazard();
+		if (idx < 0)
+			tlb_write_random();
+		else
+			tlb_write_indexed();
+	}
+	tlbw_use_hazard();
+	htw_start();
+	flush_micro_tlb_vm(vma);
+
+	if (ptemap)
+		pte_unmap(ptemap);
+	local_irq_restore(flags);
+}
+
+void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+		     unsigned long entryhi, unsigned long pagemask)
+{
+#ifdef CONFIG_XPA
+	panic("Broken for XPA kernels");
+#else
+	unsigned int old_mmid;
+	unsigned long flags;
+	unsigned long wired;
+	unsigned long old_pagemask;
+	unsigned long old_ctx;
+
+	local_irq_save(flags);
+	if (cpu_has_mmid) {
+		old_mmid = read_c0_memorymapid();
+		write_c0_memorymapid(MMID_KERNEL_WIRED);
+	}
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	htw_stop();
+	old_pagemask = read_c0_pagemask();
+	wired = num_wired_entries();
+	write_c0_wired(wired + 1);
+	write_c0_index(wired);
+	tlbw_use_hazard();	/* What is the hazard here? */
+	write_c0_pagemask(pagemask);
+	write_c0_entryhi(entryhi);
+	write_c0_entrylo0(entrylo0);
+	write_c0_entrylo1(entrylo1);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+
+	write_c0_entryhi(old_ctx);
+	if (cpu_has_mmid)
+		write_c0_memorymapid(old_mmid);
+	tlbw_use_hazard();	/* What is the hazard here? */
+	htw_start();
+	write_c0_pagemask(old_pagemask);
+	local_flush_tlb_all();
+	local_irq_restore(flags);
+#endif
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+int has_transparent_hugepage(void)
+{
+	static unsigned int mask = -1;
+
+	if (mask == -1) {	/* first call comes during __init */
+		unsigned long flags;
+
+		local_irq_save(flags);
+		write_c0_pagemask(PM_HUGE_MASK);
+		back_to_back_c0_hazard();
+		mask = read_c0_pagemask();
+		write_c0_pagemask(PM_DEFAULT_MASK);
+		local_irq_restore(flags);
+	}
+	return mask == PM_HUGE_MASK;
+}
+EXPORT_SYMBOL(has_transparent_hugepage);
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE  */
+
+/*
+ * Used for loading TLB entries before trap_init() has started, when we
+ * don't actually want to add a wired entry which remains throughout the
+ * lifetime of the system
+ */
+
+int temp_tlb_entry;
+
+__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
+			       unsigned long entryhi, unsigned long pagemask)
+{
+	int ret = 0;
+	unsigned long flags;
+	unsigned long wired;
+	unsigned long old_pagemask;
+	unsigned long old_ctx;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	htw_stop();
+	old_ctx = read_c0_entryhi();
+	old_pagemask = read_c0_pagemask();
+	wired = num_wired_entries();
+	if (--temp_tlb_entry < wired) {
+		printk(KERN_WARNING
+		       "No TLB space left for add_temporary_entry\n");
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	write_c0_index(temp_tlb_entry);
+	write_c0_pagemask(pagemask);
+	write_c0_entryhi(entryhi);
+	write_c0_entrylo0(entrylo0);
+	write_c0_entrylo1(entrylo1);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+
+	write_c0_entryhi(old_ctx);
+	write_c0_pagemask(old_pagemask);
+	htw_start();
+out:
+	local_irq_restore(flags);
+	return ret;
+}
+
+static int ntlb;
+static int __init set_ntlb(char *str)
+{
+	get_option(&str, &ntlb);
+	return 1;
+}
+
+__setup("ntlb=", set_ntlb);
+
+/*
+ * Configure TLB (for init or after a CPU has been powered off).
+ */
+static void r4k_tlb_configure(void)
+{
+	/*
+	 * You should never change this register:
+	 *   - On R4600 1.7 the tlbp never hits for pages smaller than
+	 *     the value in the c0_pagemask register.
+	 *   - The entire mm handling assumes the c0_pagemask register to
+	 *     be set to fixed-size pages.
+	 */
+	write_c0_pagemask(PM_DEFAULT_MASK);
+	back_to_back_c0_hazard();
+	if (read_c0_pagemask() != PM_DEFAULT_MASK)
+		panic("MMU doesn't support PAGE_SIZE=0x%lx", PAGE_SIZE);
+
+	write_c0_wired(0);
+	if (current_cpu_type() == CPU_R10000 ||
+	    current_cpu_type() == CPU_R12000 ||
+	    current_cpu_type() == CPU_R14000 ||
+	    current_cpu_type() == CPU_R16000)
+		write_c0_framemask(0);
+
+	if (cpu_has_rixi) {
+		/*
+		 * Enable the no read, no exec bits, and enable large physical
+		 * address.
+		 */
+#ifdef CONFIG_64BIT
+		set_c0_pagegrain(PG_RIE | PG_XIE | PG_ELPA);
+#else
+		set_c0_pagegrain(PG_RIE | PG_XIE);
+#endif
+	}
+
+	temp_tlb_entry = current_cpu_data.tlbsize - 1;
+
+	/* From this point on the ARC firmware is dead.	 */
+	local_flush_tlb_all();
+
+	/* Did I tell you that ARC SUCKS?  */
+}
+
+void tlb_init(void)
+{
+	r4k_tlb_configure();
+
+	if (ntlb) {
+		if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) {
+			int wired = current_cpu_data.tlbsize - ntlb;
+			write_c0_wired(wired);
+			write_c0_index(wired-1);
+			printk("Restricting TLB to %d entries\n", ntlb);
+		} else
+			printk("Ignoring invalid argument ntlb=%d\n", ntlb);
+	}
+
+	build_tlb_refill_handler();
+}
+
+static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd,
+			       void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		r4k_tlb_configure();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block r4k_tlb_pm_notifier_block = {
+	.notifier_call = r4k_tlb_pm_notifier,
+};
+
+static int __init r4k_tlb_init_pm(void)
+{
+	return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block);
+}
+arch_initcall(r4k_tlb_init_pm);
diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S
new file mode 100644
index 0000000000..77db401fc6
--- /dev/null
+++ b/arch/mips/mm/tlbex-fault.S
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999 Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ */
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+
+	.macro tlb_do_page_fault, write
+	NESTED(tlb_do_page_fault_\write, PT_SIZE, sp)
+	.cfi_signal_frame
+	SAVE_ALL docfi=1
+	MFC0	a2, CP0_BADVADDR
+	KMODE
+	move	a0, sp
+	REG_S	a2, PT_BVADDR(sp)
+	li	a1, \write
+	jal	do_page_fault
+	j	ret_from_exception
+	END(tlb_do_page_fault_\write)
+	.endm
+
+	tlb_do_page_fault 0
+	tlb_do_page_fault 1
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
new file mode 100644
index 0000000000..b4e1c783e6
--- /dev/null
+++ b/arch/mips/mm/tlbex.c
@@ -0,0 +1,2603 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Synthesize TLB refill handlers at runtime.
+ *
+ * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer
+ * Copyright (C) 2005, 2007, 2008, 2009	 Maciej W. Rozycki
+ * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2008, 2009 Cavium Networks, Inc.
+ * Copyright (C) 2011  MIPS Technologies, Inc.
+ *
+ * ... and the days got worse and worse and now you see
+ * I've gone completely out of my mind.
+ *
+ * They're coming to take me a away haha
+ * they're coming to take me a away hoho hihi haha
+ * to the funny farm where code is beautiful all the time ...
+ *
+ * (Condolences to Napoleon XIV)
+ */
+
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/cache.h>
+#include <linux/pgtable.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu-type.h>
+#include <asm/mmu_context.h>
+#include <asm/uasm.h>
+#include <asm/setup.h>
+#include <asm/tlbex.h>
+
+static int mips_xpa_disabled;
+
+static int __init xpa_disable(char *s)
+{
+	mips_xpa_disabled = 1;
+
+	return 1;
+}
+
+__setup("noxpa", xpa_disable);
+
+/*
+ * TLB load/store/modify handlers.
+ *
+ * Only the fastpath gets synthesized at runtime, the slowpath for
+ * do_page_fault remains normal asm.
+ */
+extern void tlb_do_page_fault_0(void);
+extern void tlb_do_page_fault_1(void);
+
+struct work_registers {
+	int r1;
+	int r2;
+	int r3;
+};
+
+struct tlb_reg_save {
+	unsigned long a;
+	unsigned long b;
+} ____cacheline_aligned_in_smp;
+
+static struct tlb_reg_save handler_reg_save[NR_CPUS];
+
+static inline int r45k_bvahwbug(void)
+{
+	/* XXX: We should probe for the presence of this bug, but we don't. */
+	return 0;
+}
+
+static inline int r4k_250MHZhwbug(void)
+{
+	/* XXX: We should probe for the presence of this bug, but we don't. */
+	return 0;
+}
+
+extern int sb1250_m3_workaround_needed(void);
+
+static inline int __maybe_unused bcm1250_m3_war(void)
+{
+	if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
+		return sb1250_m3_workaround_needed();
+	return 0;
+}
+
+static inline int __maybe_unused r10000_llsc_war(void)
+{
+	return IS_ENABLED(CONFIG_WAR_R10000_LLSC);
+}
+
+static int use_bbit_insns(void)
+{
+	switch (current_cpu_type()) {
+	case CPU_CAVIUM_OCTEON:
+	case CPU_CAVIUM_OCTEON_PLUS:
+	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int use_lwx_insns(void)
+{
+	switch (current_cpu_type()) {
+	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
+		return 1;
+	default:
+		return 0;
+	}
+}
+#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \
+    CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
+static bool scratchpad_available(void)
+{
+	return true;
+}
+static int scratchpad_offset(int i)
+{
+	/*
+	 * CVMSEG starts at address -32768 and extends for
+	 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines.
+	 */
+	i += 1; /* Kernel use starts at the top and works down. */
+	return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768;
+}
+#else
+static bool scratchpad_available(void)
+{
+	return false;
+}
+static int scratchpad_offset(int i)
+{
+	BUG();
+	/* Really unreachable, but evidently some GCC want this. */
+	return 0;
+}
+#endif
+/*
+ * Found by experiment: At least some revisions of the 4kc throw under
+ * some circumstances a machine check exception, triggered by invalid
+ * values in the index register.  Delaying the tlbp instruction until
+ * after the next branch,  plus adding an additional nop in front of
+ * tlbwi/tlbwr avoids the invalid index register values. Nobody knows
+ * why; it's not an issue caused by the core RTL.
+ *
+ */
+static int m4kc_tlbp_war(void)
+{
+	return current_cpu_type() == CPU_4KC;
+}
+
+/* Handle labels (which must be positive integers). */
+enum label_id {
+	label_second_part = 1,
+	label_leave,
+	label_vmalloc,
+	label_vmalloc_done,
+	label_tlbw_hazard_0,
+	label_split = label_tlbw_hazard_0 + 8,
+	label_tlbl_goaround1,
+	label_tlbl_goaround2,
+	label_nopage_tlbl,
+	label_nopage_tlbs,
+	label_nopage_tlbm,
+	label_smp_pgtable_change,
+	label_r3000_write_probe_fail,
+	label_large_segbits_fault,
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	label_tlb_huge_update,
+#endif
+};
+
+UASM_L_LA(_second_part)
+UASM_L_LA(_leave)
+UASM_L_LA(_vmalloc)
+UASM_L_LA(_vmalloc_done)
+/* _tlbw_hazard_x is handled differently.  */
+UASM_L_LA(_split)
+UASM_L_LA(_tlbl_goaround1)
+UASM_L_LA(_tlbl_goaround2)
+UASM_L_LA(_nopage_tlbl)
+UASM_L_LA(_nopage_tlbs)
+UASM_L_LA(_nopage_tlbm)
+UASM_L_LA(_smp_pgtable_change)
+UASM_L_LA(_r3000_write_probe_fail)
+UASM_L_LA(_large_segbits_fault)
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+UASM_L_LA(_tlb_huge_update)
+#endif
+
+static int hazard_instance;
+
+static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance)
+{
+	switch (instance) {
+	case 0 ... 7:
+		uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance);
+		return;
+	default:
+		BUG();
+	}
+}
+
+static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance)
+{
+	switch (instance) {
+	case 0 ... 7:
+		uasm_build_label(l, *p, label_tlbw_hazard_0 + instance);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * pgtable bits are assigned dynamically depending on processor feature
+ * and statically based on kernel configuration.  This spits out the actual
+ * values the kernel is using.	Required to make sense from disassembled
+ * TLB exception handlers.
+ */
+static void output_pgtable_bits_defines(void)
+{
+#define pr_define(fmt, ...)					\
+	pr_debug("#define " fmt, ##__VA_ARGS__)
+
+	pr_debug("#include <asm/asm.h>\n");
+	pr_debug("#include <asm/regdef.h>\n");
+	pr_debug("\n");
+
+	pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT);
+	pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT);
+	pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT);
+	pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT);
+	pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT);
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
+#endif
+#ifdef _PAGE_NO_EXEC_SHIFT
+	if (cpu_has_rixi)
+		pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
+#endif
+	pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
+	pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
+	pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT);
+	pr_define("PFN_PTE_SHIFT %d\n", PFN_PTE_SHIFT);
+	pr_debug("\n");
+}
+
+static inline void dump_handler(const char *symbol, const void *start, const void *end)
+{
+	unsigned int count = (end - start) / sizeof(u32);
+	const u32 *handler = start;
+	int i;
+
+	pr_debug("LEAF(%s)\n", symbol);
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+
+	for (i = 0; i < count; i++)
+		pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]);
+
+	pr_debug("\t.set\tpop\n");
+
+	pr_debug("\tEND(%s)\n", symbol);
+}
+
+/* The only general purpose registers allowed in TLB handlers. */
+#define K0		26
+#define K1		27
+
+/* Some CP0 registers */
+#define C0_INDEX	0, 0
+#define C0_ENTRYLO0	2, 0
+#define C0_TCBIND	2, 2
+#define C0_ENTRYLO1	3, 0
+#define C0_CONTEXT	4, 0
+#define C0_PAGEMASK	5, 0
+#define C0_PWBASE	5, 5
+#define C0_PWFIELD	5, 6
+#define C0_PWSIZE	5, 7
+#define C0_PWCTL	6, 6
+#define C0_BADVADDR	8, 0
+#define C0_PGD		9, 7
+#define C0_ENTRYHI	10, 0
+#define C0_EPC		14, 0
+#define C0_XCONTEXT	20, 0
+
+#ifdef CONFIG_64BIT
+# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT)
+#else
+# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT)
+#endif
+
+/* The worst case length of the handler is around 18 instructions for
+ * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
+ * Maximum space available is 32 instructions for R3000 and 64
+ * instructions for R4000.
+ *
+ * We deliberately chose a buffer size of 128, so we won't scribble
+ * over anything important on overflow before we panic.
+ */
+static u32 tlb_handler[128];
+
+/* simply assume worst case size for labels and relocs */
+static struct uasm_label labels[128];
+static struct uasm_reloc relocs[128];
+
+static int check_for_high_segbits;
+static bool fill_includes_sw_bits;
+
+static unsigned int kscratch_used_mask;
+
+static inline int __maybe_unused c0_kscratch(void)
+{
+	return 31;
+}
+
+static int allocate_kscratch(void)
+{
+	int r;
+	unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask;
+
+	r = ffs(a);
+
+	if (r == 0)
+		return -1;
+
+	r--; /* make it zero based */
+
+	kscratch_used_mask |= (1 << r);
+
+	return r;
+}
+
+static int scratch_reg;
+int pgd_reg;
+EXPORT_SYMBOL_GPL(pgd_reg);
+enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};
+
+static struct work_registers build_get_work_registers(u32 **p)
+{
+	struct work_registers r;
+
+	if (scratch_reg >= 0) {
+		/* Save in CPU local C0_KScratch? */
+		UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg);
+		r.r1 = K0;
+		r.r2 = K1;
+		r.r3 = 1;
+		return r;
+	}
+
+	if (num_possible_cpus() > 1) {
+		/* Get smp_processor_id */
+		UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG);
+		UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT);
+
+		/* handler_reg_save index in K0 */
+		UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save)));
+
+		UASM_i_LA(p, K1, (long)&handler_reg_save);
+		UASM_i_ADDU(p, K0, K0, K1);
+	} else {
+		UASM_i_LA(p, K0, (long)&handler_reg_save);
+	}
+	/* K0 now points to save area, save $1 and $2  */
+	UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0);
+	UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+
+	r.r1 = K1;
+	r.r2 = 1;
+	r.r3 = 2;
+	return r;
+}
+
+static void build_restore_work_registers(u32 **p)
+{
+	if (scratch_reg >= 0) {
+		uasm_i_ehb(p);
+		UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
+		return;
+	}
+	/* K0 already points to save area, restore $1 and $2  */
+	UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0);
+	UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+}
+
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+
+/*
+ * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,
+ * we cannot do r3000 under these circumstances.
+ *
+ * The R3000 TLB handler is simple.
+ */
+static void build_r3000_tlb_refill_handler(void)
+{
+	long pgdc = (long)pgd_current;
+	u32 *p;
+
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+	p = tlb_handler;
+
+	uasm_i_mfc0(&p, K0, C0_BADVADDR);
+	uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */
+	uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1);
+	uasm_i_srl(&p, K0, K0, 22); /* load delay */
+	uasm_i_sll(&p, K0, K0, 2);
+	uasm_i_addu(&p, K1, K1, K0);
+	uasm_i_mfc0(&p, K0, C0_CONTEXT);
+	uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */
+	uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */
+	uasm_i_addu(&p, K1, K1, K0);
+	uasm_i_lw(&p, K0, 0, K1);
+	uasm_i_nop(&p); /* load delay */
+	uasm_i_mtc0(&p, K0, C0_ENTRYLO0);
+	uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
+	uasm_i_tlbwr(&p); /* cp0 delay */
+	uasm_i_jr(&p, K1);
+	uasm_i_rfe(&p); /* branch delay */
+
+	if (p > tlb_handler + 32)
+		panic("TLB refill handler space exceeded");
+
+	pr_debug("Wrote TLB refill handler (%u instructions).\n",
+		 (unsigned int)(p - tlb_handler));
+
+	memcpy((void *)ebase, tlb_handler, 0x80);
+	local_flush_icache_range(ebase, ebase + 0x80);
+	dump_handler("r3000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x80));
+}
+#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
+
+/*
+ * The R4000 TLB handler is much more complicated. We have two
+ * consecutive handler areas with 32 instructions space each.
+ * Since they aren't used at the same time, we can overflow in the
+ * other one.To keep things simple, we first assume linear space,
+ * then we relocate it to the final handler layout as needed.
+ */
+static u32 final_handler[64];
+
+/*
+ * Hazards
+ *
+ * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:
+ * 2. A timing hazard exists for the TLBP instruction.
+ *
+ *	stalling_instruction
+ *	TLBP
+ *
+ * The JTLB is being read for the TLBP throughout the stall generated by the
+ * previous instruction. This is not really correct as the stalling instruction
+ * can modify the address used to access the JTLB.  The failure symptom is that
+ * the TLBP instruction will use an address created for the stalling instruction
+ * and not the address held in C0_ENHI and thus report the wrong results.
+ *
+ * The software work-around is to not allow the instruction preceding the TLBP
+ * to stall - make it an NOP or some other instruction guaranteed not to stall.
+ *
+ * Errata 2 will not be fixed.	This errata is also on the R5000.
+ *
+ * As if we MIPS hackers wouldn't know how to nop pipelines happy ...
+ */
+static void __maybe_unused build_tlb_probe_entry(u32 **p)
+{
+	switch (current_cpu_type()) {
+	/* Found by experiment: R4600 v2.0/R4700 needs this, too.  */
+	case CPU_R4600:
+	case CPU_R4700:
+	case CPU_R5000:
+	case CPU_NEVADA:
+		uasm_i_nop(p);
+		uasm_i_tlbp(p);
+		break;
+
+	default:
+		uasm_i_tlbp(p);
+		break;
+	}
+}
+
+void build_tlb_write_entry(u32 **p, struct uasm_label **l,
+			   struct uasm_reloc **r,
+			   enum tlb_write_entry wmode)
+{
+	void(*tlbw)(u32 **) = NULL;
+
+	switch (wmode) {
+	case tlb_random: tlbw = uasm_i_tlbwr; break;
+	case tlb_indexed: tlbw = uasm_i_tlbwi; break;
+	}
+
+	if (cpu_has_mips_r2_r6) {
+		if (cpu_has_mips_r2_exec_hazard)
+			uasm_i_ehb(p);
+		tlbw(p);
+		return;
+	}
+
+	switch (current_cpu_type()) {
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		/*
+		 * This branch uses up a mtc0 hazard nop slot and saves
+		 * two nops after the tlbw instruction.
+		 */
+		uasm_bgezl_hazard(p, r, hazard_instance);
+		tlbw(p);
+		uasm_bgezl_label(l, p, hazard_instance);
+		hazard_instance++;
+		uasm_i_nop(p);
+		break;
+
+	case CPU_R4600:
+	case CPU_R4700:
+		uasm_i_nop(p);
+		tlbw(p);
+		uasm_i_nop(p);
+		break;
+
+	case CPU_R5000:
+	case CPU_NEVADA:
+		uasm_i_nop(p); /* QED specifies 2 nops hazard */
+		uasm_i_nop(p); /* QED specifies 2 nops hazard */
+		tlbw(p);
+		break;
+
+	case CPU_R4300:
+	case CPU_5KC:
+	case CPU_TX49XX:
+	case CPU_PR4450:
+		uasm_i_nop(p);
+		tlbw(p);
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_R16000:
+	case CPU_4KC:
+	case CPU_4KEC:
+	case CPU_M14KC:
+	case CPU_M14KEC:
+	case CPU_SB1:
+	case CPU_SB1A:
+	case CPU_4KSC:
+	case CPU_20KC:
+	case CPU_25KF:
+	case CPU_BMIPS32:
+	case CPU_BMIPS3300:
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+	case CPU_BMIPS5000:
+	case CPU_LOONGSON2EF:
+	case CPU_LOONGSON64:
+	case CPU_R5500:
+		if (m4kc_tlbp_war())
+			uasm_i_nop(p);
+		fallthrough;
+	case CPU_ALCHEMY:
+		tlbw(p);
+		break;
+
+	case CPU_RM7000:
+		uasm_i_nop(p);
+		uasm_i_nop(p);
+		uasm_i_nop(p);
+		uasm_i_nop(p);
+		tlbw(p);
+		break;
+
+	case CPU_XBURST:
+		tlbw(p);
+		uasm_i_nop(p);
+		break;
+
+	default:
+		panic("No TLB refill handler yet (CPU type: %d)",
+		      current_cpu_type());
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(build_tlb_write_entry);
+
+static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
+							unsigned int reg)
+{
+	if (_PAGE_GLOBAL_SHIFT == 0) {
+		/* pte_t is already in EntryLo format */
+		return;
+	}
+
+	if (cpu_has_rixi && _PAGE_NO_EXEC != 0) {
+		if (fill_includes_sw_bits) {
+			UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
+		} else {
+			UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC));
+			UASM_i_ROTR(p, reg, reg,
+				    ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
+		}
+	} else {
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+		uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL));
+#else
+		UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL));
+#endif
+	}
+}
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+
+static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
+				   unsigned int tmp, enum label_id lid,
+				   int restore_scratch)
+{
+	if (restore_scratch) {
+		/*
+		 * Ensure the MFC0 below observes the value written to the
+		 * KScratch register by the prior MTC0.
+		 */
+		if (scratch_reg >= 0)
+			uasm_i_ehb(p);
+
+		/* Reset default page size */
+		if (PM_DEFAULT_MASK >> 16) {
+			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
+			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
+			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
+			uasm_il_b(p, r, lid);
+		} else if (PM_DEFAULT_MASK) {
+			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
+			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
+			uasm_il_b(p, r, lid);
+		} else {
+			uasm_i_mtc0(p, 0, C0_PAGEMASK);
+			uasm_il_b(p, r, lid);
+		}
+		if (scratch_reg >= 0)
+			UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
+		else
+			UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+	} else {
+		/* Reset default page size */
+		if (PM_DEFAULT_MASK >> 16) {
+			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
+			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
+			uasm_il_b(p, r, lid);
+			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
+		} else if (PM_DEFAULT_MASK) {
+			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
+			uasm_il_b(p, r, lid);
+			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
+		} else {
+			uasm_il_b(p, r, lid);
+			uasm_i_mtc0(p, 0, C0_PAGEMASK);
+		}
+	}
+}
+
+static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l,
+				       struct uasm_reloc **r,
+				       unsigned int tmp,
+				       enum tlb_write_entry wmode,
+				       int restore_scratch)
+{
+	/* Set huge page tlb entry size */
+	uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16);
+	uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff);
+	uasm_i_mtc0(p, tmp, C0_PAGEMASK);
+
+	build_tlb_write_entry(p, l, r, wmode);
+
+	build_restore_pagemask(p, r, tmp, label_leave, restore_scratch);
+}
+
+/*
+ * Check if Huge PTE is present, if so then jump to LABEL.
+ */
+static void
+build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp,
+		  unsigned int pmd, int lid)
+{
+	UASM_i_LW(p, tmp, 0, pmd);
+	if (use_bbit_insns()) {
+		uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid);
+	} else {
+		uasm_i_andi(p, tmp, tmp, _PAGE_HUGE);
+		uasm_il_bnez(p, r, tmp, lid);
+	}
+}
+
+static void build_huge_update_entries(u32 **p, unsigned int pte,
+				      unsigned int tmp)
+{
+	int small_sequence;
+
+	/*
+	 * A huge PTE describes an area the size of the
+	 * configured huge page size. This is twice the
+	 * of the large TLB entry size we intend to use.
+	 * A TLB entry half the size of the configured
+	 * huge page size is configured into entrylo0
+	 * and entrylo1 to cover the contiguous huge PTE
+	 * address space.
+	 */
+	small_sequence = (HPAGE_SIZE >> 7) < 0x10000;
+
+	/* We can clobber tmp.	It isn't used after this.*/
+	if (!small_sequence)
+		uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16));
+
+	build_convert_pte_to_entrylo(p, pte);
+	UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */
+	/* convert to entrylo1 */
+	if (small_sequence)
+		UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7);
+	else
+		UASM_i_ADDU(p, pte, pte, tmp);
+
+	UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */
+}
+
+static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
+				    struct uasm_label **l,
+				    unsigned int pte,
+				    unsigned int ptr,
+				    unsigned int flush)
+{
+#ifdef CONFIG_SMP
+	UASM_i_SC(p, pte, 0, ptr);
+	uasm_il_beqz(p, r, pte, label_tlb_huge_update);
+	UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */
+#else
+	UASM_i_SW(p, pte, 0, ptr);
+#endif
+	if (cpu_has_ftlb && flush) {
+		BUG_ON(!cpu_has_tlbinv);
+
+		UASM_i_MFC0(p, ptr, C0_ENTRYHI);
+		uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_tlb_write_entry(p, l, r, tlb_indexed);
+
+		uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_huge_update_entries(p, pte, ptr);
+		build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0);
+
+		return;
+	}
+
+	build_huge_update_entries(p, pte, ptr);
+	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
+}
+#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
+
+#ifdef CONFIG_64BIT
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pmd entry.
+ */
+void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
+		      unsigned int tmp, unsigned int ptr)
+{
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+	long pgdc = (long)pgd_current;
+#endif
+	/*
+	 * The vmalloc handling is not in the hotpath.
+	 */
+	uasm_i_dmfc0(p, tmp, C0_BADVADDR);
+
+	if (check_for_high_segbits) {
+		/*
+		 * The kernel currently implicitely assumes that the
+		 * MIPS SEGBITS parameter for the processor is
+		 * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never
+		 * allocate virtual addresses outside the maximum
+		 * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But
+		 * that doesn't prevent user code from accessing the
+		 * higher xuseg addresses.  Here, we make sure that
+		 * everything but the lower xuseg addresses goes down
+		 * the module_alloc/vmalloc path.
+		 */
+		uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
+		uasm_il_bnez(p, r, ptr, label_vmalloc);
+	} else {
+		uasm_il_bltz(p, r, tmp, label_vmalloc);
+	}
+	/* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */
+
+	if (pgd_reg != -1) {
+		/* pgd is in pgd_reg */
+		if (cpu_has_ldpte)
+			UASM_i_MFC0(p, ptr, C0_PWBASE);
+		else
+			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
+	} else {
+#if defined(CONFIG_MIPS_PGD_C0_CONTEXT)
+		/*
+		 * &pgd << 11 stored in CONTEXT [23..63].
+		 */
+		UASM_i_MFC0(p, ptr, C0_CONTEXT);
+
+		/* Clear lower 23 bits of context. */
+		uasm_i_dins(p, ptr, 0, 0, 23);
+
+		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
+		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
+		uasm_i_drotr(p, ptr, ptr, 11);
+#elif defined(CONFIG_SMP)
+		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
+		uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT);
+		UASM_i_LA_mostly(p, tmp, pgdc);
+		uasm_i_daddu(p, ptr, ptr, tmp);
+		uasm_i_dmfc0(p, tmp, C0_BADVADDR);
+		uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
+#else
+		UASM_i_LA_mostly(p, ptr, pgdc);
+		uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);
+#endif
+	}
+
+	uasm_l_vmalloc_done(l, *p);
+
+	/* get pgd offset in bytes */
+	uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3);
+
+	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
+	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+#ifndef __PAGETABLE_PUD_FOLDED
+	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */
+	uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */
+	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3);
+	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */
+#endif
+#ifndef __PAGETABLE_PMD_FOLDED
+	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
+	uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
+	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
+	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
+#endif
+}
+EXPORT_SYMBOL_GPL(build_get_pmde64);
+
+/*
+ * BVADDR is the faulting address, PTR is scratch.
+ * PTR will hold the pgd for vmalloc.
+ */
+static void
+build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
+			unsigned int bvaddr, unsigned int ptr,
+			enum vmalloc64_mode mode)
+{
+	long swpd = (long)swapper_pg_dir;
+	int single_insn_swpd;
+	int did_vmalloc_branch = 0;
+
+	single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd);
+
+	uasm_l_vmalloc(l, *p);
+
+	if (mode != not_refill && check_for_high_segbits) {
+		if (single_insn_swpd) {
+			uasm_il_bltz(p, r, bvaddr, label_vmalloc_done);
+			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
+			did_vmalloc_branch = 1;
+			/* fall through */
+		} else {
+			uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault);
+		}
+	}
+	if (!did_vmalloc_branch) {
+		if (single_insn_swpd) {
+			uasm_il_b(p, r, label_vmalloc_done);
+			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
+		} else {
+			UASM_i_LA_mostly(p, ptr, swpd);
+			uasm_il_b(p, r, label_vmalloc_done);
+			if (uasm_in_compat_space_p(swpd))
+				uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd));
+			else
+				uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd));
+		}
+	}
+	if (mode != not_refill && check_for_high_segbits) {
+		uasm_l_large_segbits_fault(l, *p);
+
+		if (mode == refill_scratch && scratch_reg >= 0)
+			uasm_i_ehb(p);
+
+		/*
+		 * We get here if we are an xsseg address, or if we are
+		 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary.
+		 *
+		 * Ignoring xsseg (assume disabled so would generate
+		 * (address errors?), the only remaining possibility
+		 * is the upper xuseg addresses.  On processors with
+		 * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these
+		 * addresses would have taken an address error. We try
+		 * to mimic that here by taking a load/istream page
+		 * fault.
+		 */
+		if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+			uasm_i_sync(p, 0);
+		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
+		uasm_i_jr(p, ptr);
+
+		if (mode == refill_scratch) {
+			if (scratch_reg >= 0)
+				UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
+			else
+				UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+		} else {
+			uasm_i_nop(p);
+		}
+	}
+}
+
+#else /* !CONFIG_64BIT */
+
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pgd entry.
+ */
+void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+	if (pgd_reg != -1) {
+		/* pgd is in pgd_reg */
+		uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg);
+		uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	} else {
+		long pgdc = (long)pgd_current;
+
+		/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
+#ifdef CONFIG_SMP
+		uasm_i_mfc0(p, ptr, SMP_CPUID_REG);
+		UASM_i_LA_mostly(p, tmp, pgdc);
+		uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT);
+		uasm_i_addu(p, ptr, tmp, ptr);
+#else
+		UASM_i_LA_mostly(p, ptr, pgdc);
+#endif
+		uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+		uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
+	}
+	uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
+	uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
+	uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
+}
+EXPORT_SYMBOL_GPL(build_get_pgde32);
+
+#endif /* !CONFIG_64BIT */
+
+static void build_adjust_context(u32 **p, unsigned int ctx)
+{
+	unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12;
+	unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1);
+
+	if (shift)
+		UASM_i_SRL(p, ctx, ctx, shift);
+	uasm_i_andi(p, ctx, ctx, mask);
+}
+
+void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+	/*
+	 * Bug workaround for the Nevada. It seems as if under certain
+	 * circumstances the move from cp0_context might produce a
+	 * bogus result when the mfc0 instruction and its consumer are
+	 * in a different cacheline or a load instruction, probably any
+	 * memory reference, is between them.
+	 */
+	switch (current_cpu_type()) {
+	case CPU_NEVADA:
+		UASM_i_LW(p, ptr, 0, ptr);
+		GET_CONTEXT(p, tmp); /* get context reg */
+		break;
+
+	default:
+		GET_CONTEXT(p, tmp); /* get context reg */
+		UASM_i_LW(p, ptr, 0, ptr);
+		break;
+	}
+
+	build_adjust_context(p, tmp);
+	UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */
+}
+EXPORT_SYMBOL_GPL(build_get_ptep);
+
+void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
+{
+	int pte_off_even = 0;
+	int pte_off_odd = sizeof(pte_t);
+
+#if defined(CONFIG_CPU_MIPS32) && defined(CONFIG_PHYS_ADDR_T_64BIT)
+	/* The low 32 bits of EntryLo is stored in pte_high */
+	pte_off_even += offsetof(pte_t, pte_high);
+	pte_off_odd += offsetof(pte_t, pte_high);
+#endif
+
+	if (IS_ENABLED(CONFIG_XPA)) {
+		uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */
+		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, tmp, C0_ENTRYLO0);
+
+		if (cpu_has_xpa && !mips_xpa_disabled) {
+			uasm_i_lw(p, tmp, 0, ptep);
+			uasm_i_ext(p, tmp, tmp, 0, 24);
+			uasm_i_mthc0(p, tmp, C0_ENTRYLO0);
+		}
+
+		uasm_i_lw(p, tmp, pte_off_odd, ptep); /* odd pte */
+		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, tmp, C0_ENTRYLO1);
+
+		if (cpu_has_xpa && !mips_xpa_disabled) {
+			uasm_i_lw(p, tmp, sizeof(pte_t), ptep);
+			uasm_i_ext(p, tmp, tmp, 0, 24);
+			uasm_i_mthc0(p, tmp, C0_ENTRYLO1);
+		}
+		return;
+	}
+
+	UASM_i_LW(p, tmp, pte_off_even, ptep); /* get even pte */
+	UASM_i_LW(p, ptep, pte_off_odd, ptep); /* get odd pte */
+	if (r45k_bvahwbug())
+		build_tlb_probe_entry(p);
+	build_convert_pte_to_entrylo(p, tmp);
+	if (r4k_250MHZhwbug())
+		UASM_i_MTC0(p, 0, C0_ENTRYLO0);
+	UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
+	build_convert_pte_to_entrylo(p, ptep);
+	if (r45k_bvahwbug())
+		uasm_i_mfc0(p, tmp, C0_INDEX);
+	if (r4k_250MHZhwbug())
+		UASM_i_MTC0(p, 0, C0_ENTRYLO1);
+	UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
+}
+EXPORT_SYMBOL_GPL(build_update_entries);
+
+struct mips_huge_tlb_info {
+	int huge_pte;
+	int restore_scratch;
+	bool need_reload_pte;
+};
+
+static struct mips_huge_tlb_info
+build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
+			       struct uasm_reloc **r, unsigned int tmp,
+			       unsigned int ptr, int c0_scratch_reg)
+{
+	struct mips_huge_tlb_info rv;
+	unsigned int even, odd;
+	int vmalloc_branch_delay_filled = 0;
+	const int scratch = 1; /* Our extra working register */
+
+	rv.huge_pte = scratch;
+	rv.restore_scratch = 0;
+	rv.need_reload_pte = false;
+
+	if (check_for_high_segbits) {
+		UASM_i_MFC0(p, tmp, C0_BADVADDR);
+
+		if (pgd_reg != -1)
+			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
+		else
+			UASM_i_MFC0(p, ptr, C0_CONTEXT);
+
+		if (c0_scratch_reg >= 0)
+			UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg);
+		else
+			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
+
+		uasm_i_dsrl_safe(p, scratch, tmp,
+				 PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
+		uasm_il_bnez(p, r, scratch, label_vmalloc);
+
+		if (pgd_reg == -1) {
+			vmalloc_branch_delay_filled = 1;
+			/* Clear lower 23 bits of context. */
+			uasm_i_dins(p, ptr, 0, 0, 23);
+		}
+	} else {
+		if (pgd_reg != -1)
+			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
+		else
+			UASM_i_MFC0(p, ptr, C0_CONTEXT);
+
+		UASM_i_MFC0(p, tmp, C0_BADVADDR);
+
+		if (c0_scratch_reg >= 0)
+			UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg);
+		else
+			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);
+
+		if (pgd_reg == -1)
+			/* Clear lower 23 bits of context. */
+			uasm_i_dins(p, ptr, 0, 0, 23);
+
+		uasm_il_bltz(p, r, tmp, label_vmalloc);
+	}
+
+	if (pgd_reg == -1) {
+		vmalloc_branch_delay_filled = 1;
+		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
+		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
+
+		uasm_i_drotr(p, ptr, ptr, 11);
+	}
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define LOC_PTEP scratch
+#else
+#define LOC_PTEP ptr
+#endif
+
+	if (!vmalloc_branch_delay_filled)
+		/* get pgd offset in bytes */
+		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
+
+	uasm_l_vmalloc_done(l, *p);
+
+	/*
+	 *			   tmp		ptr
+	 * fall-through case =	 badvaddr  *pgd_current
+	 * vmalloc case	     =	 badvaddr  swapper_pg_dir
+	 */
+
+	if (vmalloc_branch_delay_filled)
+		/* get pgd offset in bytes */
+		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);
+
+#ifdef __PAGETABLE_PMD_FOLDED
+	GET_CONTEXT(p, tmp); /* get context reg */
+#endif
+	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3);
+
+	if (use_lwx_insns()) {
+		UASM_i_LWX(p, LOC_PTEP, scratch, ptr);
+	} else {
+		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */
+		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
+	}
+
+#ifndef __PAGETABLE_PUD_FOLDED
+	/* get pud offset in bytes */
+	uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3);
+	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3);
+
+	if (use_lwx_insns()) {
+		UASM_i_LWX(p, ptr, scratch, ptr);
+	} else {
+		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
+		UASM_i_LW(p, ptr, 0, ptr);
+	}
+	/* ptr contains a pointer to PMD entry */
+	/* tmp contains the address */
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	/* get pmd offset in bytes */
+	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
+	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3);
+	GET_CONTEXT(p, tmp); /* get context reg */
+
+	if (use_lwx_insns()) {
+		UASM_i_LWX(p, scratch, scratch, ptr);
+	} else {
+		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
+		UASM_i_LW(p, scratch, 0, ptr);
+	}
+#endif
+	/* Adjust the context during the load latency. */
+	build_adjust_context(p, tmp);
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update);
+	/*
+	 * The in the LWX case we don't want to do the load in the
+	 * delay slot.	It cannot issue in the same cycle and may be
+	 * speculative and unneeded.
+	 */
+	if (use_lwx_insns())
+		uasm_i_nop(p);
+#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
+
+
+	/* build_update_entries */
+	if (use_lwx_insns()) {
+		even = ptr;
+		odd = tmp;
+		UASM_i_LWX(p, even, scratch, tmp);
+		UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t));
+		UASM_i_LWX(p, odd, scratch, tmp);
+	} else {
+		UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */
+		even = tmp;
+		odd = ptr;
+		UASM_i_LW(p, even, 0, ptr); /* get even pte */
+		UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */
+	}
+	if (cpu_has_rixi) {
+		uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
+		uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL));
+	} else {
+		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
+		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL));
+	}
+	UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
+
+	if (c0_scratch_reg >= 0) {
+		uasm_i_ehb(p);
+		UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg);
+		build_tlb_write_entry(p, l, r, tlb_random);
+		uasm_l_leave(l, *p);
+		rv.restore_scratch = 1;
+	} else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13)  {
+		build_tlb_write_entry(p, l, r, tlb_random);
+		uasm_l_leave(l, *p);
+		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
+	} else {
+		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
+		build_tlb_write_entry(p, l, r, tlb_random);
+		uasm_l_leave(l, *p);
+		rv.restore_scratch = 1;
+	}
+
+	uasm_i_eret(p); /* return from trap */
+
+	return rv;
+}
+
+/*
+ * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
+ * because EXL == 0.  If we wrap, we can also use the 32 instruction
+ * slots before the XTLB refill exception handler which belong to the
+ * unused TLB refill exception.
+ */
+#define MIPS64_REFILL_INSNS 32
+
+static void build_r4000_tlb_refill_handler(void)
+{
+	u32 *p = tlb_handler;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	u32 *f;
+	unsigned int final_len;
+	struct mips_huge_tlb_info htlb_info __maybe_unused;
+	enum vmalloc64_mode vmalloc_mode __maybe_unused;
+
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+	memset(final_handler, 0, sizeof(final_handler));
+
+	if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) {
+		htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
+							  scratch_reg);
+		vmalloc_mode = refill_scratch;
+	} else {
+		htlb_info.huge_pte = K0;
+		htlb_info.restore_scratch = 0;
+		htlb_info.need_reload_pte = true;
+		vmalloc_mode = refill_noscratch;
+		/*
+		 * create the plain linear handler
+		 */
+		if (bcm1250_m3_war()) {
+			unsigned int segbits = 44;
+
+			uasm_i_dmfc0(&p, K0, C0_BADVADDR);
+			uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
+			uasm_i_xor(&p, K0, K0, K1);
+			uasm_i_dsrl_safe(&p, K1, K0, 62);
+			uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
+			uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
+			uasm_i_or(&p, K0, K0, K1);
+			uasm_il_bnez(&p, &r, K0, label_leave);
+			/* No need for uasm_i_nop */
+		}
+
+#ifdef CONFIG_64BIT
+		build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
+#else
+		build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
+#endif
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+		build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update);
+#endif
+
+		build_get_ptep(&p, K0, K1);
+		build_update_entries(&p, K0, K1);
+		build_tlb_write_entry(&p, &l, &r, tlb_random);
+		uasm_l_leave(&l, p);
+		uasm_i_eret(&p); /* return from trap */
+	}
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	uasm_l_tlb_huge_update(&l, p);
+	if (htlb_info.need_reload_pte)
+		UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
+	build_huge_update_entries(&p, htlb_info.huge_pte, K1);
+	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
+				   htlb_info.restore_scratch);
+#endif
+
+#ifdef CONFIG_64BIT
+	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);
+#endif
+
+	/*
+	 * Overflow check: For the 64bit handler, we need at least one
+	 * free instruction slot for the wrap-around branch. In worst
+	 * case, if the intended insertion point is a delay slot, we
+	 * need three, with the second nop'ed and the third being
+	 * unused.
+	 */
+	switch (boot_cpu_type()) {
+	default:
+		if (sizeof(long) == 4) {
+		fallthrough;
+	case CPU_LOONGSON2EF:
+		/* Loongson2 ebase is different than r4k, we have more space */
+			if ((p - tlb_handler) > 64)
+				panic("TLB refill handler space exceeded");
+			/*
+			 * Now fold the handler in the TLB refill handler space.
+			 */
+			f = final_handler;
+			/* Simplest case, just copy the handler. */
+			uasm_copy_handler(relocs, labels, tlb_handler, p, f);
+			final_len = p - tlb_handler;
+			break;
+		} else {
+			if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1)
+			    || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3)
+				&& uasm_insn_has_bdelay(relocs,
+							tlb_handler + MIPS64_REFILL_INSNS - 3)))
+				panic("TLB refill handler space exceeded");
+			/*
+			 * Now fold the handler in the TLB refill handler space.
+			 */
+			f = final_handler + MIPS64_REFILL_INSNS;
+			if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) {
+				/* Just copy the handler. */
+				uasm_copy_handler(relocs, labels, tlb_handler, p, f);
+				final_len = p - tlb_handler;
+			} else {
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+				const enum label_id ls = label_tlb_huge_update;
+#else
+				const enum label_id ls = label_vmalloc;
+#endif
+				u32 *split;
+				int ov = 0;
+				int i;
+
+				for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++)
+					;
+				BUG_ON(i == ARRAY_SIZE(labels));
+				split = labels[i].addr;
+
+				/*
+				 * See if we have overflown one way or the other.
+				 */
+				if (split > tlb_handler + MIPS64_REFILL_INSNS ||
+				    split < p - MIPS64_REFILL_INSNS)
+					ov = 1;
+
+				if (ov) {
+					/*
+					 * Split two instructions before the end.  One
+					 * for the branch and one for the instruction
+					 * in the delay slot.
+					 */
+					split = tlb_handler + MIPS64_REFILL_INSNS - 2;
+
+					/*
+					 * If the branch would fall in a delay slot,
+					 * we must back up an additional instruction
+					 * so that it is no longer in a delay slot.
+					 */
+					if (uasm_insn_has_bdelay(relocs, split - 1))
+						split--;
+				}
+				/* Copy first part of the handler. */
+				uasm_copy_handler(relocs, labels, tlb_handler, split, f);
+				f += split - tlb_handler;
+
+				if (ov) {
+					/* Insert branch. */
+					uasm_l_split(&l, final_handler);
+					uasm_il_b(&f, &r, label_split);
+					if (uasm_insn_has_bdelay(relocs, split))
+						uasm_i_nop(&f);
+					else {
+						uasm_copy_handler(relocs, labels,
+								  split, split + 1, f);
+						uasm_move_labels(labels, f, f + 1, -1);
+						f++;
+						split++;
+					}
+				}
+
+				/* Copy the rest of the handler. */
+				uasm_copy_handler(relocs, labels, split, p, final_handler);
+				final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) +
+					    (p - split);
+			}
+		}
+		break;
+	}
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB refill handler (%u instructions).\n",
+		 final_len);
+
+	memcpy((void *)ebase, final_handler, 0x100);
+	local_flush_icache_range(ebase, ebase + 0x100);
+	dump_handler("r4000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x100));
+}
+
+static void setup_pw(void)
+{
+	unsigned int pwctl;
+	unsigned long pgd_i, pgd_w;
+#ifndef __PAGETABLE_PMD_FOLDED
+	unsigned long pmd_i, pmd_w;
+#endif
+	unsigned long pt_i, pt_w;
+	unsigned long pte_i, pte_w;
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	unsigned long psn;
+
+	psn = ilog2(_PAGE_HUGE);     /* bit used to indicate huge page */
+#endif
+	pgd_i = PGDIR_SHIFT;  /* 1st level PGD */
+#ifndef __PAGETABLE_PMD_FOLDED
+	pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_TABLE_ORDER;
+
+	pmd_i = PMD_SHIFT;    /* 2nd level PMD */
+	pmd_w = PMD_SHIFT - PAGE_SHIFT;
+#else
+	pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_TABLE_ORDER;
+#endif
+
+	pt_i  = PAGE_SHIFT;    /* 3rd level PTE */
+	pt_w  = PAGE_SHIFT - 3;
+
+	pte_i = ilog2(_PAGE_GLOBAL);
+	pte_w = 0;
+	pwctl = 1 << 30; /* Set PWDirExt */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i);
+	write_c0_pwsize(1 << 30 | pgd_w << 24 | pmd_w << 12 | pt_w << 6 | pte_w);
+#else
+	write_c0_pwfield(pgd_i << 24 | pt_i << 6 | pte_i);
+	write_c0_pwsize(1 << 30 | pgd_w << 24 | pt_w << 6 | pte_w);
+#endif
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	pwctl |= (1 << 6 | psn);
+#endif
+	write_c0_pwctl(pwctl);
+	write_c0_kpgd((long)swapper_pg_dir);
+	kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */
+}
+
+static void build_loongson3_tlb_refill_handler(void)
+{
+	u32 *p = tlb_handler;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+
+	if (check_for_high_segbits) {
+		uasm_i_dmfc0(&p, K0, C0_BADVADDR);
+		uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
+		uasm_il_beqz(&p, &r, K1, label_vmalloc);
+		uasm_i_nop(&p);
+
+		uasm_il_bgez(&p, &r, K0, label_large_segbits_fault);
+		uasm_i_nop(&p);
+		uasm_l_vmalloc(&l, p);
+	}
+
+	uasm_i_dmfc0(&p, K1, C0_PGD);
+
+	uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+#ifndef __PAGETABLE_PMD_FOLDED
+	uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+#endif
+	uasm_i_ldpte(&p, K1, 0);      /* even */
+	uasm_i_ldpte(&p, K1, 1);      /* odd */
+	uasm_i_tlbwr(&p);
+
+	/* restore page mask */
+	if (PM_DEFAULT_MASK >> 16) {
+		uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16);
+		uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff);
+		uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+	} else if (PM_DEFAULT_MASK) {
+		uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK);
+		uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+	} else {
+		uasm_i_mtc0(&p, 0, C0_PAGEMASK);
+	}
+
+	uasm_i_eret(&p);
+
+	if (check_for_high_segbits) {
+		uasm_l_large_segbits_fault(&l, p);
+		UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
+		uasm_i_jr(&p, K1);
+		uasm_i_nop(&p);
+	}
+
+	uasm_resolve_relocs(relocs, labels);
+	memcpy((void *)(ebase + 0x80), tlb_handler, 0x80);
+	local_flush_icache_range(ebase + 0x80, ebase + 0x100);
+	dump_handler("loongson3_tlb_refill",
+		     (u32 *)(ebase + 0x80), (u32 *)(ebase + 0x100));
+}
+
+static void build_setup_pgd(void)
+{
+	const int a0 = 4;
+	const int __maybe_unused a1 = 5;
+	const int __maybe_unused a2 = 6;
+	u32 *p = (u32 *)msk_isa16_mode((ulong)tlbmiss_handler_setup_pgd);
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+	long pgdc = (long)pgd_current;
+#endif
+
+	memset(p, 0, tlbmiss_handler_setup_pgd_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+	pgd_reg = allocate_kscratch();
+#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
+	if (pgd_reg == -1) {
+		struct uasm_label *l = labels;
+		struct uasm_reloc *r = relocs;
+
+		/* PGD << 11 in c0_Context */
+		/*
+		 * If it is a ckseg0 address, convert to a physical
+		 * address.  Shifting right by 29 and adding 4 will
+		 * result in zero for these addresses.
+		 *
+		 */
+		UASM_i_SRA(&p, a1, a0, 29);
+		UASM_i_ADDIU(&p, a1, a1, 4);
+		uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1);
+		uasm_i_nop(&p);
+		uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
+		uasm_l_tlbl_goaround1(&l, p);
+		UASM_i_SLL(&p, a0, a0, 11);
+		UASM_i_MTC0(&p, a0, C0_CONTEXT);
+		uasm_i_jr(&p, 31);
+		uasm_i_ehb(&p);
+	} else {
+		/* PGD in c0_KScratch */
+		if (cpu_has_ldpte)
+			UASM_i_MTC0(&p, a0, C0_PWBASE);
+		else
+			UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+		uasm_i_jr(&p, 31);
+		uasm_i_ehb(&p);
+	}
+#else
+#ifdef CONFIG_SMP
+	/* Save PGD to pgd_current[smp_processor_id()] */
+	UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG);
+	UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT);
+	UASM_i_LA_mostly(&p, a2, pgdc);
+	UASM_i_ADDU(&p, a2, a2, a1);
+	UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
+#else
+	UASM_i_LA_mostly(&p, a2, pgdc);
+	UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
+#endif /* SMP */
+
+	/* if pgd_reg is allocated, save PGD also to scratch register */
+	if (pgd_reg != -1) {
+		UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+		uasm_i_jr(&p, 31);
+		uasm_i_ehb(&p);
+	} else {
+		uasm_i_jr(&p, 31);
+		uasm_i_nop(&p);
+	}
+#endif
+	if (p >= (u32 *)tlbmiss_handler_setup_pgd_end)
+		panic("tlbmiss_handler_setup_pgd space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)tlbmiss_handler_setup_pgd));
+
+	dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd,
+					tlbmiss_handler_setup_pgd_end);
+}
+
+static void
+iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
+{
+#ifdef CONFIG_SMP
+	if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+		uasm_i_sync(p, 0);
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (cpu_has_64bits)
+		uasm_i_lld(p, pte, 0, ptr);
+	else
+# endif
+		UASM_i_LL(p, pte, 0, ptr);
+#else
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (cpu_has_64bits)
+		uasm_i_ld(p, pte, 0, ptr);
+	else
+# endif
+		UASM_i_LW(p, pte, 0, ptr);
+#endif
+}
+
+static void
+iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
+	unsigned int mode, unsigned int scratch)
+{
+	unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
+	unsigned int swmode = mode & ~hwmode;
+
+	if (IS_ENABLED(CONFIG_XPA) && !cpu_has_64bits) {
+		uasm_i_lui(p, scratch, swmode >> 16);
+		uasm_i_or(p, pte, pte, scratch);
+		BUG_ON(swmode & 0xffff);
+	} else {
+		uasm_i_ori(p, pte, pte, mode);
+	}
+
+#ifdef CONFIG_SMP
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (cpu_has_64bits)
+		uasm_i_scd(p, pte, 0, ptr);
+	else
+# endif
+		UASM_i_SC(p, pte, 0, ptr);
+
+	if (r10000_llsc_war())
+		uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
+	else
+		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
+
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (!cpu_has_64bits) {
+		/* no uasm_i_nop needed */
+		uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr);
+		uasm_i_ori(p, pte, pte, hwmode);
+		BUG_ON(hwmode & ~0xffff);
+		uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr);
+		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
+		/* no uasm_i_nop needed */
+		uasm_i_lw(p, pte, 0, ptr);
+	} else
+		uasm_i_nop(p);
+# else
+	uasm_i_nop(p);
+# endif
+#else
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (cpu_has_64bits)
+		uasm_i_sd(p, pte, 0, ptr);
+	else
+# endif
+		UASM_i_SW(p, pte, 0, ptr);
+
+# ifdef CONFIG_PHYS_ADDR_T_64BIT
+	if (!cpu_has_64bits) {
+		uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr);
+		uasm_i_ori(p, pte, pte, hwmode);
+		BUG_ON(hwmode & ~0xffff);
+		uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr);
+		uasm_i_lw(p, pte, 0, ptr);
+	}
+# endif
+#endif
+}
+
+/*
+ * Check if PTE is present, if not then jump to LABEL. PTR points to
+ * the page table where this PTE is located, PTE will be re-loaded
+ * with it's original value.
+ */
+static void
+build_pte_present(u32 **p, struct uasm_reloc **r,
+		  int pte, int ptr, int scratch, enum label_id lid)
+{
+	int t = scratch >= 0 ? scratch : pte;
+	int cur = pte;
+
+	if (cpu_has_rixi) {
+		if (use_bbit_insns()) {
+			uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
+			uasm_i_nop(p);
+		} else {
+			if (_PAGE_PRESENT_SHIFT) {
+				uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT);
+				cur = t;
+			}
+			uasm_i_andi(p, t, cur, 1);
+			uasm_il_beqz(p, r, t, lid);
+			if (pte == t)
+				/* You lose the SMP race :-(*/
+				iPTE_LW(p, pte, ptr);
+		}
+	} else {
+		if (_PAGE_PRESENT_SHIFT) {
+			uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT);
+			cur = t;
+		}
+		uasm_i_andi(p, t, cur,
+			(_PAGE_PRESENT | _PAGE_NO_READ) >> _PAGE_PRESENT_SHIFT);
+		uasm_i_xori(p, t, t, _PAGE_PRESENT >> _PAGE_PRESENT_SHIFT);
+		uasm_il_bnez(p, r, t, lid);
+		if (pte == t)
+			/* You lose the SMP race :-(*/
+			iPTE_LW(p, pte, ptr);
+	}
+}
+
+/* Make PTE valid, store result in PTR. */
+static void
+build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,
+		 unsigned int ptr, unsigned int scratch)
+{
+	unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED;
+
+	iPTE_SW(p, r, pte, ptr, mode, scratch);
+}
+
+/*
+ * Check if PTE can be written to, if not branch to LABEL. Regardless
+ * restore PTE with value from PTR when done.
+ */
+static void
+build_pte_writable(u32 **p, struct uasm_reloc **r,
+		   unsigned int pte, unsigned int ptr, int scratch,
+		   enum label_id lid)
+{
+	int t = scratch >= 0 ? scratch : pte;
+	int cur = pte;
+
+	if (_PAGE_PRESENT_SHIFT) {
+		uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT);
+		cur = t;
+	}
+	uasm_i_andi(p, t, cur,
+		    (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT);
+	uasm_i_xori(p, t, t,
+		    (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT);
+	uasm_il_bnez(p, r, t, lid);
+	if (pte == t)
+		/* You lose the SMP race :-(*/
+		iPTE_LW(p, pte, ptr);
+	else
+		uasm_i_nop(p);
+}
+
+/* Make PTE writable, update software status bits as well, then store
+ * at PTR.
+ */
+static void
+build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,
+		 unsigned int ptr, unsigned int scratch)
+{
+	unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID
+			     | _PAGE_DIRTY);
+
+	iPTE_SW(p, r, pte, ptr, mode, scratch);
+}
+
+/*
+ * Check if PTE can be modified, if not branch to LABEL. Regardless
+ * restore PTE with value from PTR when done.
+ */
+static void
+build_pte_modifiable(u32 **p, struct uasm_reloc **r,
+		     unsigned int pte, unsigned int ptr, int scratch,
+		     enum label_id lid)
+{
+	if (use_bbit_insns()) {
+		uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
+		uasm_i_nop(p);
+	} else {
+		int t = scratch >= 0 ? scratch : pte;
+		uasm_i_srl(p, t, pte, _PAGE_WRITE_SHIFT);
+		uasm_i_andi(p, t, t, 1);
+		uasm_il_beqz(p, r, t, lid);
+		if (pte == t)
+			/* You lose the SMP race :-(*/
+			iPTE_LW(p, pte, ptr);
+	}
+}
+
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+
+
+/*
+ * R3000 style TLB load/store/modify handlers.
+ */
+
+/*
+ * This places the pte into ENTRYLO0 and writes it with tlbwi.
+ * Then it returns.
+ */
+static void
+build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp)
+{
+	uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
+	uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */
+	uasm_i_tlbwi(p);
+	uasm_i_jr(p, tmp);
+	uasm_i_rfe(p); /* branch delay */
+}
+
+/*
+ * This places the pte into ENTRYLO0 and writes it with tlbwi
+ * or tlbwr as appropriate.  This is because the index register
+ * may have the probe fail bit set as a result of a trap on a
+ * kseg2 access, i.e. without refill.  Then it returns.
+ */
+static void
+build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l,
+			     struct uasm_reloc **r, unsigned int pte,
+			     unsigned int tmp)
+{
+	uasm_i_mfc0(p, tmp, C0_INDEX);
+	uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */
+	uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */
+	uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */
+	uasm_i_tlbwi(p); /* cp0 delay */
+	uasm_i_jr(p, tmp);
+	uasm_i_rfe(p); /* branch delay */
+	uasm_l_r3000_write_probe_fail(l, *p);
+	uasm_i_tlbwr(p); /* cp0 delay */
+	uasm_i_jr(p, tmp);
+	uasm_i_rfe(p); /* branch delay */
+}
+
+static void
+build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,
+				   unsigned int ptr)
+{
+	long pgdc = (long)pgd_current;
+
+	uasm_i_mfc0(p, pte, C0_BADVADDR);
+	uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */
+	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
+	uasm_i_srl(p, pte, pte, 22); /* load delay */
+	uasm_i_sll(p, pte, pte, 2);
+	uasm_i_addu(p, ptr, ptr, pte);
+	uasm_i_mfc0(p, pte, C0_CONTEXT);
+	uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */
+	uasm_i_andi(p, pte, pte, 0xffc); /* load delay */
+	uasm_i_addu(p, ptr, ptr, pte);
+	uasm_i_lw(p, pte, 0, ptr);
+	uasm_i_tlbp(p); /* load delay */
+}
+
+static void build_r3000_tlb_load_handler(void)
+{
+	u32 *p = (u32 *)handle_tlbl;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(p, 0, handle_tlbl_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
+	uasm_i_nop(&p); /* load delay */
+	build_make_valid(&p, &r, K0, K1, -1);
+	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
+
+	uasm_l_nopage_tlbl(&l, p);
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbl_end)
+		panic("TLB load handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbl));
+
+	dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_end);
+}
+
+static void build_r3000_tlb_store_handler(void)
+{
+	u32 *p = (u32 *)handle_tlbs;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(p, 0, handle_tlbs_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
+	uasm_i_nop(&p); /* load delay */
+	build_make_write(&p, &r, K0, K1, -1);
+	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
+
+	uasm_l_nopage_tlbs(&l, p);
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbs_end)
+		panic("TLB store handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbs));
+
+	dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_end);
+}
+
+static void build_r3000_tlb_modify_handler(void)
+{
+	u32 *p = (u32 *)handle_tlbm;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(p, 0, handle_tlbm_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);
+	uasm_i_nop(&p); /* load delay */
+	build_make_write(&p, &r, K0, K1, -1);
+	build_r3000_pte_reload_tlbwi(&p, K0, K1);
+
+	uasm_l_nopage_tlbm(&l, p);
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbm_end)
+		panic("TLB modify handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbm));
+
+	dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_end);
+}
+#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
+
+static bool cpu_has_tlbex_tlbp_race(void)
+{
+	/*
+	 * When a Hardware Table Walker is running it can replace TLB entries
+	 * at any time, leading to a race between it & the CPU.
+	 */
+	if (cpu_has_htw)
+		return true;
+
+	/*
+	 * If the CPU shares FTLB RAM with its siblings then our entry may be
+	 * replaced at any time by a sibling performing a write to the FTLB.
+	 */
+	if (cpu_has_shared_ftlb_ram)
+		return true;
+
+	/* In all other cases there ought to be no race condition to handle */
+	return false;
+}
+
+/*
+ * R4000 style TLB load/store/modify handlers.
+ */
+static struct work_registers
+build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
+				   struct uasm_reloc **r)
+{
+	struct work_registers wr = build_get_work_registers(p);
+
+#ifdef CONFIG_64BIT
+	build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */
+#else
+	build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */
+#endif
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	/*
+	 * For huge tlb entries, pmd doesn't contain an address but
+	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
+	 * see if we need to jump to huge tlb processing.
+	 */
+	build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update);
+#endif
+
+	UASM_i_MFC0(p, wr.r1, C0_BADVADDR);
+	UASM_i_LW(p, wr.r2, 0, wr.r2);
+	UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT - PTE_T_LOG2);
+	uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
+	UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1);
+
+#ifdef CONFIG_SMP
+	uasm_l_smp_pgtable_change(l, *p);
+#endif
+	iPTE_LW(p, wr.r1, wr.r2); /* get even pte */
+	if (!m4kc_tlbp_war()) {
+		build_tlb_probe_entry(p);
+		if (cpu_has_tlbex_tlbp_race()) {
+			/* race condition happens, leaving */
+			uasm_i_ehb(p);
+			uasm_i_mfc0(p, wr.r3, C0_INDEX);
+			uasm_il_bltz(p, r, wr.r3, label_leave);
+			uasm_i_nop(p);
+		}
+	}
+	return wr;
+}
+
+static void
+build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,
+				   struct uasm_reloc **r, unsigned int tmp,
+				   unsigned int ptr)
+{
+	uasm_i_ori(p, ptr, ptr, sizeof(pte_t));
+	uasm_i_xori(p, ptr, ptr, sizeof(pte_t));
+	build_update_entries(p, tmp, ptr);
+	build_tlb_write_entry(p, l, r, tlb_indexed);
+	uasm_l_leave(l, *p);
+	build_restore_work_registers(p);
+	uasm_i_eret(p); /* return from trap */
+
+#ifdef CONFIG_64BIT
+	build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill);
+#endif
+}
+
+static void build_r4000_tlb_load_handler(void)
+{
+	u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbl);
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	struct work_registers wr;
+
+	memset(p, 0, handle_tlbl_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	if (bcm1250_m3_war()) {
+		unsigned int segbits = 44;
+
+		uasm_i_dmfc0(&p, K0, C0_BADVADDR);
+		uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
+		uasm_i_xor(&p, K0, K0, K1);
+		uasm_i_dsrl_safe(&p, K1, K0, 62);
+		uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
+		uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
+		uasm_i_or(&p, K0, K0, K1);
+		uasm_il_bnez(&p, &r, K0, label_leave);
+		/* No need for uasm_i_nop */
+	}
+
+	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
+	if (m4kc_tlbp_war())
+		build_tlb_probe_entry(&p);
+
+	if (cpu_has_rixi && !cpu_has_rixiex) {
+		/*
+		 * If the page is not _PAGE_VALID, RI or XI could not
+		 * have triggered it.  Skip the expensive test..
+		 */
+		if (use_bbit_insns()) {
+			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
+				      label_tlbl_goaround1);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
+			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1);
+		}
+		uasm_i_nop(&p);
+
+		/*
+		 * Warn if something may race with us & replace the TLB entry
+		 * before we read it here. Everything with such races should
+		 * also have dedicated RiXi exception handlers, so this
+		 * shouldn't be hit.
+		 */
+		WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path");
+
+		uasm_i_tlbr(&p);
+
+		if (cpu_has_mips_r2_exec_hazard)
+			uasm_i_ehb(&p);
+
+		/* Examine  entrylo 0 or 1 based on ptr. */
+		if (use_bbit_insns()) {
+			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
+			uasm_i_beqz(&p, wr.r3, 8);
+		}
+		/* load it in the delay slot*/
+		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
+		/* load it if ptr is odd */
+		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
+		/*
+		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or
+		 * XI must have triggered it.
+		 */
+		if (use_bbit_insns()) {
+			uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl);
+			uasm_i_nop(&p);
+			uasm_l_tlbl_goaround1(&l, p);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r3, 2);
+			uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl);
+			uasm_i_nop(&p);
+		}
+		uasm_l_tlbl_goaround1(&l, p);
+	}
+	build_make_valid(&p, &r, wr.r1, wr.r2, wr.r3);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	/*
+	 * This is the entry point when build_r4000_tlbchange_handler_head
+	 * spots a huge page.
+	 */
+	uasm_l_tlb_huge_update(&l, p);
+	iPTE_LW(&p, wr.r1, wr.r2);
+	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);
+	build_tlb_probe_entry(&p);
+
+	if (cpu_has_rixi && !cpu_has_rixiex) {
+		/*
+		 * If the page is not _PAGE_VALID, RI or XI could not
+		 * have triggered it.  Skip the expensive test..
+		 */
+		if (use_bbit_insns()) {
+			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID),
+				      label_tlbl_goaround2);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID);
+			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
+		}
+		uasm_i_nop(&p);
+
+		/*
+		 * Warn if something may race with us & replace the TLB entry
+		 * before we read it here. Everything with such races should
+		 * also have dedicated RiXi exception handlers, so this
+		 * shouldn't be hit.
+		 */
+		WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path");
+
+		uasm_i_tlbr(&p);
+
+		if (cpu_has_mips_r2_exec_hazard)
+			uasm_i_ehb(&p);
+
+		/* Examine  entrylo 0 or 1 based on ptr. */
+		if (use_bbit_insns()) {
+			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t));
+			uasm_i_beqz(&p, wr.r3, 8);
+		}
+		/* load it in the delay slot*/
+		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0);
+		/* load it if ptr is odd */
+		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);
+		/*
+		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or
+		 * XI must have triggered it.
+		 */
+		if (use_bbit_insns()) {
+			uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2);
+		} else {
+			uasm_i_andi(&p, wr.r3, wr.r3, 2);
+			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
+		}
+		if (PM_DEFAULT_MASK == 0)
+			uasm_i_nop(&p);
+		/*
+		 * We clobbered C0_PAGEMASK, restore it.  On the other branch
+		 * it is restored in build_huge_tlb_write_entry.
+		 */
+		build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0);
+
+		uasm_l_tlbl_goaround2(&l, p);
+	}
+	uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
+#endif
+
+	uasm_l_nopage_tlbl(&l, p);
+	if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+		uasm_i_sync(&p, 0);
+	build_restore_work_registers(&p);
+#ifdef CONFIG_CPU_MICROMIPS
+	if ((unsigned long)tlb_do_page_fault_0 & 1) {
+		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0));
+		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0));
+		uasm_i_jr(&p, K0);
+	} else
+#endif
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbl_end)
+		panic("TLB load handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbl));
+
+	dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_end);
+}
+
+static void build_r4000_tlb_store_handler(void)
+{
+	u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbs);
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	struct work_registers wr;
+
+	memset(p, 0, handle_tlbs_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
+	if (m4kc_tlbp_war())
+		build_tlb_probe_entry(&p);
+	build_make_write(&p, &r, wr.r1, wr.r2, wr.r3);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	/*
+	 * This is the entry point when
+	 * build_r4000_tlbchange_handler_head spots a huge page.
+	 */
+	uasm_l_tlb_huge_update(&l, p);
+	iPTE_LW(&p, wr.r1, wr.r2);
+	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
+	build_tlb_probe_entry(&p);
+	uasm_i_ori(&p, wr.r1, wr.r1,
+		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
+#endif
+
+	uasm_l_nopage_tlbs(&l, p);
+	if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+		uasm_i_sync(&p, 0);
+	build_restore_work_registers(&p);
+#ifdef CONFIG_CPU_MICROMIPS
+	if ((unsigned long)tlb_do_page_fault_1 & 1) {
+		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
+		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
+		uasm_i_jr(&p, K0);
+	} else
+#endif
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbs_end)
+		panic("TLB store handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbs));
+
+	dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_end);
+}
+
+static void build_r4000_tlb_modify_handler(void)
+{
+	u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbm);
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	struct work_registers wr;
+
+	memset(p, 0, handle_tlbm_end - (char *)p);
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	wr = build_r4000_tlbchange_handler_head(&p, &l, &r);
+	build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);
+	if (m4kc_tlbp_war())
+		build_tlb_probe_entry(&p);
+	/* Present and writable bits set, set accessed and dirty bits. */
+	build_make_write(&p, &r, wr.r1, wr.r2, wr.r3);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	/*
+	 * This is the entry point when
+	 * build_r4000_tlbchange_handler_head spots a huge page.
+	 */
+	uasm_l_tlb_huge_update(&l, p);
+	iPTE_LW(&p, wr.r1, wr.r2);
+	build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);
+	build_tlb_probe_entry(&p);
+	uasm_i_ori(&p, wr.r1, wr.r1,
+		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0);
+#endif
+
+	uasm_l_nopage_tlbm(&l, p);
+	if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+		uasm_i_sync(&p, 0);
+	build_restore_work_registers(&p);
+#ifdef CONFIG_CPU_MICROMIPS
+	if ((unsigned long)tlb_do_page_fault_1 & 1) {
+		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
+		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
+		uasm_i_jr(&p, K0);
+	} else
+#endif
+	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	uasm_i_nop(&p);
+
+	if (p >= (u32 *)handle_tlbm_end)
+		panic("TLB modify handler fastpath space exceeded");
+
+	uasm_resolve_relocs(relocs, labels);
+	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",
+		 (unsigned int)(p - (u32 *)handle_tlbm));
+
+	dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_end);
+}
+
+static void flush_tlb_handlers(void)
+{
+	local_flush_icache_range((unsigned long)handle_tlbl,
+			   (unsigned long)handle_tlbl_end);
+	local_flush_icache_range((unsigned long)handle_tlbs,
+			   (unsigned long)handle_tlbs_end);
+	local_flush_icache_range((unsigned long)handle_tlbm,
+			   (unsigned long)handle_tlbm_end);
+	local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd,
+			   (unsigned long)tlbmiss_handler_setup_pgd_end);
+}
+
+static void print_htw_config(void)
+{
+	unsigned long config;
+	unsigned int pwctl;
+	const int field = 2 * sizeof(unsigned long);
+
+	config = read_c0_pwfield();
+	pr_debug("PWField (0x%0*lx): GDI: 0x%02lx  UDI: 0x%02lx  MDI: 0x%02lx  PTI: 0x%02lx  PTEI: 0x%02lx\n",
+		field, config,
+		(config & MIPS_PWFIELD_GDI_MASK) >> MIPS_PWFIELD_GDI_SHIFT,
+		(config & MIPS_PWFIELD_UDI_MASK) >> MIPS_PWFIELD_UDI_SHIFT,
+		(config & MIPS_PWFIELD_MDI_MASK) >> MIPS_PWFIELD_MDI_SHIFT,
+		(config & MIPS_PWFIELD_PTI_MASK) >> MIPS_PWFIELD_PTI_SHIFT,
+		(config & MIPS_PWFIELD_PTEI_MASK) >> MIPS_PWFIELD_PTEI_SHIFT);
+
+	config = read_c0_pwsize();
+	pr_debug("PWSize  (0x%0*lx): PS: 0x%lx  GDW: 0x%02lx  UDW: 0x%02lx  MDW: 0x%02lx  PTW: 0x%02lx  PTEW: 0x%02lx\n",
+		field, config,
+		(config & MIPS_PWSIZE_PS_MASK) >> MIPS_PWSIZE_PS_SHIFT,
+		(config & MIPS_PWSIZE_GDW_MASK) >> MIPS_PWSIZE_GDW_SHIFT,
+		(config & MIPS_PWSIZE_UDW_MASK) >> MIPS_PWSIZE_UDW_SHIFT,
+		(config & MIPS_PWSIZE_MDW_MASK) >> MIPS_PWSIZE_MDW_SHIFT,
+		(config & MIPS_PWSIZE_PTW_MASK) >> MIPS_PWSIZE_PTW_SHIFT,
+		(config & MIPS_PWSIZE_PTEW_MASK) >> MIPS_PWSIZE_PTEW_SHIFT);
+
+	pwctl = read_c0_pwctl();
+	pr_debug("PWCtl   (0x%x): PWEn: 0x%x  XK: 0x%x  XS: 0x%x  XU: 0x%x  DPH: 0x%x  HugePg: 0x%x  Psn: 0x%x\n",
+		pwctl,
+		(pwctl & MIPS_PWCTL_PWEN_MASK) >> MIPS_PWCTL_PWEN_SHIFT,
+		(pwctl & MIPS_PWCTL_XK_MASK) >> MIPS_PWCTL_XK_SHIFT,
+		(pwctl & MIPS_PWCTL_XS_MASK) >> MIPS_PWCTL_XS_SHIFT,
+		(pwctl & MIPS_PWCTL_XU_MASK) >> MIPS_PWCTL_XU_SHIFT,
+		(pwctl & MIPS_PWCTL_DPH_MASK) >> MIPS_PWCTL_DPH_SHIFT,
+		(pwctl & MIPS_PWCTL_HUGEPG_MASK) >> MIPS_PWCTL_HUGEPG_SHIFT,
+		(pwctl & MIPS_PWCTL_PSN_MASK) >> MIPS_PWCTL_PSN_SHIFT);
+}
+
+static void config_htw_params(void)
+{
+	unsigned long pwfield, pwsize, ptei;
+	unsigned int config;
+
+	/*
+	 * We are using 2-level page tables, so we only need to
+	 * setup GDW and PTW appropriately. UDW and MDW will remain 0.
+	 * The default value of GDI/UDI/MDI/PTI is 0xc. It is illegal to
+	 * write values less than 0xc in these fields because the entire
+	 * write will be dropped. As a result of which, we must preserve
+	 * the original reset values and overwrite only what we really want.
+	 */
+
+	pwfield = read_c0_pwfield();
+	/* re-initialize the GDI field */
+	pwfield &= ~MIPS_PWFIELD_GDI_MASK;
+	pwfield |= PGDIR_SHIFT << MIPS_PWFIELD_GDI_SHIFT;
+	/* re-initialize the PTI field including the even/odd bit */
+	pwfield &= ~MIPS_PWFIELD_PTI_MASK;
+	pwfield |= PAGE_SHIFT << MIPS_PWFIELD_PTI_SHIFT;
+	if (CONFIG_PGTABLE_LEVELS >= 3) {
+		pwfield &= ~MIPS_PWFIELD_MDI_MASK;
+		pwfield |= PMD_SHIFT << MIPS_PWFIELD_MDI_SHIFT;
+	}
+	/* Set the PTEI right shift */
+	ptei = _PAGE_GLOBAL_SHIFT << MIPS_PWFIELD_PTEI_SHIFT;
+	pwfield |= ptei;
+	write_c0_pwfield(pwfield);
+	/* Check whether the PTEI value is supported */
+	back_to_back_c0_hazard();
+	pwfield = read_c0_pwfield();
+	if (((pwfield & MIPS_PWFIELD_PTEI_MASK) << MIPS_PWFIELD_PTEI_SHIFT)
+		!= ptei) {
+		pr_warn("Unsupported PTEI field value: 0x%lx. HTW will not be enabled",
+			ptei);
+		/*
+		 * Drop option to avoid HTW being enabled via another path
+		 * (eg htw_reset())
+		 */
+		current_cpu_data.options &= ~MIPS_CPU_HTW;
+		return;
+	}
+
+	pwsize = ilog2(PTRS_PER_PGD) << MIPS_PWSIZE_GDW_SHIFT;
+	pwsize |= ilog2(PTRS_PER_PTE) << MIPS_PWSIZE_PTW_SHIFT;
+	if (CONFIG_PGTABLE_LEVELS >= 3)
+		pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT;
+
+	/* Set pointer size to size of directory pointers */
+	if (IS_ENABLED(CONFIG_64BIT))
+		pwsize |= MIPS_PWSIZE_PS_MASK;
+	/* PTEs may be multiple pointers long (e.g. with XPA) */
+	pwsize |= ((PTE_T_LOG2 - PGD_T_LOG2) << MIPS_PWSIZE_PTEW_SHIFT)
+			& MIPS_PWSIZE_PTEW_MASK;
+
+	write_c0_pwsize(pwsize);
+
+	/* Make sure everything is set before we enable the HTW */
+	back_to_back_c0_hazard();
+
+	/*
+	 * Enable HTW (and only for XUSeg on 64-bit), and disable the rest of
+	 * the pwctl fields.
+	 */
+	config = 1 << MIPS_PWCTL_PWEN_SHIFT;
+	if (IS_ENABLED(CONFIG_64BIT))
+		config |= MIPS_PWCTL_XU_MASK;
+	write_c0_pwctl(config);
+	pr_info("Hardware Page Table Walker enabled\n");
+
+	print_htw_config();
+}
+
+static void config_xpa_params(void)
+{
+#ifdef CONFIG_XPA
+	unsigned int pagegrain;
+
+	if (mips_xpa_disabled) {
+		pr_info("Extended Physical Addressing (XPA) disabled\n");
+		return;
+	}
+
+	pagegrain = read_c0_pagegrain();
+	write_c0_pagegrain(pagegrain | PG_ELPA);
+	back_to_back_c0_hazard();
+	pagegrain = read_c0_pagegrain();
+
+	if (pagegrain & PG_ELPA)
+		pr_info("Extended Physical Addressing (XPA) enabled\n");
+	else
+		panic("Extended Physical Addressing (XPA) disabled");
+#endif
+}
+
+static void check_pabits(void)
+{
+	unsigned long entry;
+	unsigned pabits, fillbits;
+
+	if (!cpu_has_rixi || _PAGE_NO_EXEC == 0) {
+		/*
+		 * We'll only be making use of the fact that we can rotate bits
+		 * into the fill if the CPU supports RIXI, so don't bother
+		 * probing this for CPUs which don't.
+		 */
+		return;
+	}
+
+	write_c0_entrylo0(~0ul);
+	back_to_back_c0_hazard();
+	entry = read_c0_entrylo0();
+
+	/* clear all non-PFN bits */
+	entry &= ~((1 << MIPS_ENTRYLO_PFN_SHIFT) - 1);
+	entry &= ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
+
+	/* find a lower bound on PABITS, and upper bound on fill bits */
+	pabits = fls_long(entry) + 6;
+	fillbits = max_t(int, (int)BITS_PER_LONG - pabits, 0);
+
+	/* minus the RI & XI bits */
+	fillbits -= min_t(unsigned, fillbits, 2);
+
+	if (fillbits >= ilog2(_PAGE_NO_EXEC))
+		fill_includes_sw_bits = true;
+
+	pr_debug("Entry* registers contain %u fill bits\n", fillbits);
+}
+
+void build_tlb_refill_handler(void)
+{
+	/*
+	 * The refill handler is generated per-CPU, multi-node systems
+	 * may have local storage for it. The other handlers are only
+	 * needed once.
+	 */
+	static int run_once = 0;
+
+	if (IS_ENABLED(CONFIG_XPA) && !cpu_has_rixi)
+		panic("Kernels supporting XPA currently require CPUs with RIXI");
+
+	output_pgtable_bits_defines();
+	check_pabits();
+
+#ifdef CONFIG_64BIT
+	check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
+#endif
+
+	if (cpu_has_3kex) {
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+		if (!run_once) {
+			build_setup_pgd();
+			build_r3000_tlb_refill_handler();
+			build_r3000_tlb_load_handler();
+			build_r3000_tlb_store_handler();
+			build_r3000_tlb_modify_handler();
+			flush_tlb_handlers();
+			run_once++;
+		}
+#else
+		panic("No R3000 TLB refill handler");
+#endif
+		return;
+	}
+
+	if (cpu_has_ldpte)
+		setup_pw();
+
+	if (!run_once) {
+		scratch_reg = allocate_kscratch();
+		build_setup_pgd();
+		build_r4000_tlb_load_handler();
+		build_r4000_tlb_store_handler();
+		build_r4000_tlb_modify_handler();
+		if (cpu_has_ldpte)
+			build_loongson3_tlb_refill_handler();
+		else
+			build_r4000_tlb_refill_handler();
+		flush_tlb_handlers();
+		run_once++;
+	}
+	if (cpu_has_xpa)
+		config_xpa_params();
+	if (cpu_has_htw)
+		config_htw_params();
+}
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
new file mode 100644
index 0000000000..75ef90486f
--- /dev/null
+++ b/arch/mips/mm/uasm-micromips.c
@@ -0,0 +1,232 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * A small micro-assembler. It is intentionally kept simple, does only
+ * support a subset of instructions, and does not try to hide pipeline
+ * effects like branch delay slots.
+ *
+ * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer
+ * Copyright (C) 2005, 2007  Maciej W. Rozycki
+ * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2012, 2013   MIPS Technologies, Inc.  All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <asm/inst.h>
+#include <asm/elf.h>
+#include <asm/bugs.h>
+#include <asm/uasm.h>
+
+#define RS_MASK		0x1f
+#define RS_SH		16
+#define RT_MASK		0x1f
+#define RT_SH		21
+#define SCIMM_MASK	0x3ff
+#define SCIMM_SH	16
+
+/* This macro sets the non-variable bits of an instruction. */
+#define M(a, b, c, d, e, f)					\
+	((a) << OP_SH						\
+	 | (b) << RT_SH						\
+	 | (c) << RS_SH						\
+	 | (d) << RD_SH						\
+	 | (e) << RE_SH						\
+	 | (f) << FUNC_SH)
+
+#include "uasm.c"
+
+static const struct insn insn_table_MM[insn_invalid] = {
+	[insn_addu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD},
+	[insn_addiu]	= {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_and]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD},
+	[insn_andi]	= {M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_beq]	= {M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beql]	= {0, 0},
+	[insn_bgez]	= {M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM},
+	[insn_bgezl]	= {0, 0},
+	[insn_bltz]	= {M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM},
+	[insn_bltzl]	= {0, 0},
+	[insn_bne]	= {M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM},
+	[insn_cache]	= {M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM},
+	[insn_cfc1]	= {M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS},
+	[insn_cfcmsa]	= {M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE},
+	[insn_ctc1]	= {M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS},
+	[insn_ctcmsa]	= {M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE},
+	[insn_daddu]	= {0, 0},
+	[insn_daddiu]	= {0, 0},
+	[insn_di]	= {M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS},
+	[insn_divu]	= {M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS},
+	[insn_dmfc0]	= {0, 0},
+	[insn_dmtc0]	= {0, 0},
+	[insn_dsll]	= {0, 0},
+	[insn_dsll32]	= {0, 0},
+	[insn_dsra]	= {0, 0},
+	[insn_dsrl]	= {0, 0},
+	[insn_dsrl32]	= {0, 0},
+	[insn_drotr]	= {0, 0},
+	[insn_drotr32]	= {0, 0},
+	[insn_dsubu]	= {0, 0},
+	[insn_eret]	= {M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0},
+	[insn_ins]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE},
+	[insn_ext]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE},
+	[insn_j]	= {M(mm_j32_op, 0, 0, 0, 0, 0), JIMM},
+	[insn_jal]	= {M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM},
+	[insn_jalr]	= {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS},
+	[insn_jr]	= {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS},
+	[insn_lb]	= {M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_ld]	= {0, 0},
+	[insn_lh]	= {M(mm_lh32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_ll]	= {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM},
+	[insn_lld]	= {0, 0},
+	[insn_lui]	= {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM},
+	[insn_lw]	= {M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_mfc0]	= {M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD},
+	[insn_mfhi]	= {M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS},
+	[insn_mflo]	= {M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS},
+	[insn_mtc0]	= {M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD},
+	[insn_mthi]	= {M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS},
+	[insn_mtlo]	= {M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS},
+	[insn_mul]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD},
+	[insn_or]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD},
+	[insn_ori]	= {M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_pref]	= {M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM},
+	[insn_rfe]	= {0, 0},
+	[insn_sc]	= {M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM},
+	[insn_scd]	= {0, 0},
+	[insn_sd]	= {0, 0},
+	[insn_sll]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD},
+	[insn_sllv]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD},
+	[insn_slt]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD},
+	[insn_sltiu]	= {M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_sltu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD},
+	[insn_sra]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD},
+	[insn_srav]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_srav_op), RT | RS | RD},
+	[insn_srl]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD},
+	[insn_srlv]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD},
+	[insn_rotr]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD},
+	[insn_subu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD},
+	[insn_sw]	= {M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_sync]	= {M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS},
+	[insn_tlbp]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0},
+	[insn_tlbr]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0},
+	[insn_tlbwi]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0},
+	[insn_tlbwr]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0},
+	[insn_wait]	= {M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM},
+	[insn_wsbh]	= {M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS},
+	[insn_xor]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD},
+	[insn_xori]	= {M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_dins]	= {0, 0},
+	[insn_dinsm]	= {0, 0},
+	[insn_syscall]	= {M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM},
+	[insn_bbit0]	= {0, 0},
+	[insn_bbit1]	= {0, 0},
+	[insn_lwx]	= {0, 0},
+	[insn_ldx]	= {0, 0},
+};
+
+#undef M
+
+static inline u32 build_bimm(s32 arg)
+{
+	WARN(arg > 0xffff || arg < -0x10000,
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n");
+
+	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 1) & 0x7fff);
+}
+
+static inline u32 build_jimm(u32 arg)
+{
+
+	WARN(arg & ~((JIMM_MASK << 2) | 1),
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg >> 1) & JIMM_MASK;
+}
+
+/*
+ * The order of opcode arguments is implicitly left to right,
+ * starting with RS and ending with FUNC or IMM.
+ */
+static void build_insn(u32 **buf, enum opcode opc, ...)
+{
+	const struct insn *ip;
+	va_list ap;
+	u32 op;
+
+	if (opc < 0 || opc >= insn_invalid ||
+	    (opc == insn_daddiu && r4k_daddiu_bug()) ||
+	    (insn_table_MM[opc].match == 0 && insn_table_MM[opc].fields == 0))
+		panic("Unsupported Micro-assembler instruction %d", opc);
+
+	ip = &insn_table_MM[opc];
+
+	op = ip->match;
+	va_start(ap, opc);
+	if (ip->fields & RS) {
+		if (opc == insn_mfc0 || opc == insn_mtc0 ||
+		    opc == insn_cfc1 || opc == insn_ctc1)
+			op |= build_rt(va_arg(ap, u32));
+		else
+			op |= build_rs(va_arg(ap, u32));
+	}
+	if (ip->fields & RT) {
+		if (opc == insn_mfc0 || opc == insn_mtc0 ||
+		    opc == insn_cfc1 || opc == insn_ctc1)
+			op |= build_rs(va_arg(ap, u32));
+		else
+			op |= build_rt(va_arg(ap, u32));
+	}
+	if (ip->fields & RD)
+		op |= build_rd(va_arg(ap, u32));
+	if (ip->fields & RE)
+		op |= build_re(va_arg(ap, u32));
+	if (ip->fields & SIMM)
+		op |= build_simm(va_arg(ap, s32));
+	if (ip->fields & UIMM)
+		op |= build_uimm(va_arg(ap, u32));
+	if (ip->fields & BIMM)
+		op |= build_bimm(va_arg(ap, s32));
+	if (ip->fields & JIMM)
+		op |= build_jimm(va_arg(ap, u32));
+	if (ip->fields & FUNC)
+		op |= build_func(va_arg(ap, u32));
+	if (ip->fields & SET)
+		op |= build_set(va_arg(ap, u32));
+	if (ip->fields & SCIMM)
+		op |= build_scimm(va_arg(ap, u32));
+	va_end(ap);
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	**buf = ((op & 0xffff) << 16) | (op >> 16);
+#else
+	**buf = op;
+#endif
+	(*buf)++;
+}
+
+static inline void
+__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab)
+{
+	long laddr = (long)lab->addr;
+	long raddr = (long)rel->addr;
+
+	switch (rel->type) {
+	case R_MIPS_PC16:
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+		*rel->addr |= (build_bimm(laddr - (raddr + 4)) << 16);
+#else
+		*rel->addr |= build_bimm(laddr - (raddr + 4));
+#endif
+		break;
+
+	default:
+		panic("Unsupported Micro-assembler relocation %d",
+		      rel->type);
+	}
+}
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
new file mode 100644
index 0000000000..e15c6700cd
--- /dev/null
+++ b/arch/mips/mm/uasm-mips.c
@@ -0,0 +1,292 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * A small micro-assembler. It is intentionally kept simple, does only
+ * support a subset of instructions, and does not try to hide pipeline
+ * effects like branch delay slots.
+ *
+ * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer
+ * Copyright (C) 2005, 2007  Maciej W. Rozycki
+ * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2012, 2013  MIPS Technologies, Inc.  All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <asm/inst.h>
+#include <asm/elf.h>
+#include <asm/bugs.h>
+#include <asm/uasm.h>
+
+#define RS_MASK		0x1f
+#define RS_SH		21
+#define RT_MASK		0x1f
+#define RT_SH		16
+#define SCIMM_MASK	0xfffff
+#define SCIMM_SH	6
+
+/* This macro sets the non-variable bits of an instruction. */
+#define M(a, b, c, d, e, f)					\
+	((a) << OP_SH						\
+	 | (b) << RS_SH						\
+	 | (c) << RT_SH						\
+	 | (d) << RD_SH						\
+	 | (e) << RE_SH						\
+	 | (f) << FUNC_SH)
+
+/* This macro sets the non-variable bits of an R6 instruction. */
+#define M6(a, b, c, d, e)					\
+	((a) << OP_SH						\
+	 | (b) << RS_SH						\
+	 | (c) << RT_SH						\
+	 | (d) << SIMM9_SH					\
+	 | (e) << FUNC_SH)
+
+#include "uasm.c"
+
+static const struct insn insn_table[insn_invalid] = {
+	[insn_addiu]	= {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_addu]	= {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD},
+	[insn_and]	= {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD},
+	[insn_andi]	= {M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM},
+	[insn_bbit0]	= {M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_bbit1]	= {M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beq]	= {M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beql]	= {M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_bgez]	= {M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM},
+	[insn_bgezl]	= {M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM},
+	[insn_bgtz]	= {M(bgtz_op, 0, 0, 0, 0, 0), RS | BIMM},
+	[insn_blez]	= {M(blez_op, 0, 0, 0, 0, 0), RS | BIMM},
+	[insn_bltz]	= {M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM},
+	[insn_bltzl]	= {M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM},
+	[insn_bne]	= {M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_break]	= {M(spec_op, 0, 0, 0, 0, break_op), SCIMM},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_cache]	= {M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+#else
+	[insn_cache]	= {M6(spec3_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9},
+#endif
+	[insn_cfc1]	= {M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD},
+	[insn_cfcmsa]	= {M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE},
+	[insn_ctc1]	= {M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD},
+	[insn_ctcmsa]	= {M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE},
+	[insn_daddiu]	= {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_daddu]	= {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD},
+	[insn_ddivu]	= {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT},
+	[insn_ddivu_r6]	= {M(spec_op, 0, 0, 0, ddivu_ddivu6_op, ddivu_op),
+				RS | RT | RD},
+	[insn_di]	= {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT},
+	[insn_dins]	= {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE},
+	[insn_dinsm]	= {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE},
+	[insn_dinsu]	= {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE},
+	[insn_divu]	= {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT},
+	[insn_divu_r6]	= {M(spec_op, 0, 0, 0, divu_divu6_op, divu_op),
+				RS | RT | RD},
+	[insn_dmfc0]	= {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
+	[insn_dmodu]	= {M(spec_op, 0, 0, 0, ddivu_dmodu_op, ddivu_op),
+				RS | RT | RD},
+	[insn_dmtc0]	= {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
+	[insn_dmultu]	= {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
+	[insn_dmulu]	= {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op),
+				RS | RT | RD},
+	[insn_drotr]	= {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
+	[insn_drotr32]	= {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsbh]	= {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD},
+	[insn_dshd]	= {M(spec3_op, 0, 0, 0, dshd_op, dbshfl_op), RT | RD},
+	[insn_dsll]	= {M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE},
+	[insn_dsll32]	= {M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE},
+	[insn_dsllv]	= {M(spec_op, 0, 0, 0, 0, dsllv_op),  RS | RT | RD},
+	[insn_dsra]	= {M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE},
+	[insn_dsra32]	= {M(spec_op, 0, 0, 0, 0, dsra32_op), RT | RD | RE},
+	[insn_dsrav]	= {M(spec_op, 0, 0, 0, 0, dsrav_op),  RS | RT | RD},
+	[insn_dsrl]	= {M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE},
+	[insn_dsrl32]	= {M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsrlv]	= {M(spec_op, 0, 0, 0, 0, dsrlv_op),  RS | RT | RD},
+	[insn_dsubu]	= {M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD},
+	[insn_eret]	= {M(cop0_op, cop_op, 0, 0, 0, eret_op),  0},
+	[insn_ext]	= {M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE},
+	[insn_ins]	= {M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE},
+	[insn_j]	= {M(j_op, 0, 0, 0, 0, 0),  JIMM},
+	[insn_jal]	= {M(jal_op, 0, 0, 0, 0, 0),	JIMM},
+	[insn_jalr]	= {M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_jr]	= {M(spec_op, 0, 0, 0, 0, jr_op),  RS},
+#else
+	[insn_jr]	= {M(spec_op, 0, 0, 0, 0, jalr_op),  RS},
+#endif
+	[insn_lb]	= {M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_lbu]	= {M(lbu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_ld]	= {M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lddir]	= {M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD},
+	[insn_ldpte]	= {M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD},
+	[insn_ldx]	= {M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD},
+	[insn_lh]	= {M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lhu]	= {M(lhu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_ll]	= {M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lld]	= {M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM},
+#else
+	[insn_ll]	= {M6(spec3_op, 0, 0, 0, ll6_op),  RS | RT | SIMM9},
+	[insn_lld]	= {M6(spec3_op, 0, 0, 0, lld6_op),  RS | RT | SIMM9},
+#endif
+	[insn_lui]	= {M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM},
+	[insn_lw]	= {M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lwu]	= {M(lwu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lwx]	= {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD},
+	[insn_mfc0]	= {M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mfhc0]	= {M(cop0_op, mfhc0_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mfhi]	= {M(spec_op, 0, 0, 0, 0, mfhi_op), RD},
+	[insn_mflo]	= {M(spec_op, 0, 0, 0, 0, mflo_op), RD},
+	[insn_modu]	= {M(spec_op, 0, 0, 0, divu_modu_op, divu_op),
+				RS | RT | RD},
+	[insn_movn]	= {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD},
+	[insn_movz]	= {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD},
+	[insn_mtc0]	= {M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mthc0]	= {M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mthi]	= {M(spec_op, 0, 0, 0, 0, mthi_op), RS},
+	[insn_mtlo]	= {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
+	[insn_mulu]	= {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
+				RS | RT | RD},
+	[insn_muhu]	= {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op),
+				RS | RT | RD},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_mul]	= {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
+#else
+	[insn_mul]	= {M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
+#endif
+	[insn_multu]	= {M(spec_op, 0, 0, 0, 0, multu_op), RS | RT},
+	[insn_nor]	= {M(spec_op, 0, 0, 0, 0, nor_op),  RS | RT | RD},
+	[insn_or]	= {M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD},
+	[insn_ori]	= {M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_pref]	= {M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+#else
+	[insn_pref]	= {M6(spec3_op, 0, 0, 0, pref6_op),  RS | RT | SIMM9},
+#endif
+	[insn_rfe]	= {M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0},
+	[insn_rotr]	= {M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE},
+	[insn_sb]	= {M(sb_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+#ifndef CONFIG_CPU_MIPSR6
+	[insn_sc]	= {M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_scd]	= {M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM},
+#else
+	[insn_sc]	= {M6(spec3_op, 0, 0, 0, sc6_op),  RS | RT | SIMM9},
+	[insn_scd]	= {M6(spec3_op, 0, 0, 0, scd6_op),  RS | RT | SIMM9},
+#endif
+	[insn_sd]	= {M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_seleqz]	= {M(spec_op, 0, 0, 0, 0, seleqz_op), RS | RT | RD},
+	[insn_selnez]	= {M(spec_op, 0, 0, 0, 0, selnez_op), RS | RT | RD},
+	[insn_sh]	= {M(sh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_sll]	= {M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE},
+	[insn_sllv]	= {M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD},
+	[insn_slt]	= {M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD},
+	[insn_slti]	= {M(slti_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_sltiu]	= {M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_sltu]	= {M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD},
+	[insn_sra]	= {M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE},
+	[insn_srav]	= {M(spec_op, 0, 0, 0, 0, srav_op), RS | RT | RD},
+	[insn_srl]	= {M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE},
+	[insn_srlv]	= {M(spec_op, 0, 0, 0, 0, srlv_op),  RS | RT | RD},
+	[insn_subu]	= {M(spec_op, 0, 0, 0, 0, subu_op),	RS | RT | RD},
+	[insn_sw]	= {M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_sync]	= {M(spec_op, 0, 0, 0, 0, sync_op), RE},
+	[insn_syscall]	= {M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM},
+	[insn_tlbp]	= {M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0},
+	[insn_tlbr]	= {M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0},
+	[insn_tlbwi]	= {M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0},
+	[insn_tlbwr]	= {M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0},
+	[insn_wait]	= {M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM},
+	[insn_wsbh]	= {M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD},
+	[insn_xor]	= {M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD},
+	[insn_xori]	= {M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM},
+	[insn_yield]	= {M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD},
+};
+
+#undef M
+
+static inline u32 build_bimm(s32 arg)
+{
+	WARN(arg > 0x1ffff || arg < -0x20000,
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n");
+
+	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff);
+}
+
+static inline u32 build_jimm(u32 arg)
+{
+	WARN(arg & ~(JIMM_MASK << 2),
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg >> 2) & JIMM_MASK;
+}
+
+/*
+ * The order of opcode arguments is implicitly left to right,
+ * starting with RS and ending with FUNC or IMM.
+ */
+static void build_insn(u32 **buf, enum opcode opc, ...)
+{
+	const struct insn *ip;
+	va_list ap;
+	u32 op;
+
+	if (opc < 0 || opc >= insn_invalid ||
+	    (opc == insn_daddiu && r4k_daddiu_bug()) ||
+	    (insn_table[opc].match == 0 && insn_table[opc].fields == 0))
+		panic("Unsupported Micro-assembler instruction %d", opc);
+
+	ip = &insn_table[opc];
+
+	op = ip->match;
+	va_start(ap, opc);
+	if (ip->fields & RS)
+		op |= build_rs(va_arg(ap, u32));
+	if (ip->fields & RT)
+		op |= build_rt(va_arg(ap, u32));
+	if (ip->fields & RD)
+		op |= build_rd(va_arg(ap, u32));
+	if (ip->fields & RE)
+		op |= build_re(va_arg(ap, u32));
+	if (ip->fields & SIMM)
+		op |= build_simm(va_arg(ap, s32));
+	if (ip->fields & UIMM)
+		op |= build_uimm(va_arg(ap, u32));
+	if (ip->fields & BIMM)
+		op |= build_bimm(va_arg(ap, s32));
+	if (ip->fields & JIMM)
+		op |= build_jimm(va_arg(ap, u32));
+	if (ip->fields & FUNC)
+		op |= build_func(va_arg(ap, u32));
+	if (ip->fields & SET)
+		op |= build_set(va_arg(ap, u32));
+	if (ip->fields & SCIMM)
+		op |= build_scimm(va_arg(ap, u32));
+	if (ip->fields & SIMM9)
+		op |= build_scimm9(va_arg(ap, u32));
+	va_end(ap);
+
+	**buf = op;
+	(*buf)++;
+}
+
+static inline void
+__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab)
+{
+	long laddr = (long)lab->addr;
+	long raddr = (long)rel->addr;
+
+	switch (rel->type) {
+	case R_MIPS_PC16:
+		*rel->addr |= build_bimm(laddr - (raddr + 4));
+		break;
+
+	default:
+		panic("Unsupported Micro-assembler relocation %d",
+		      rel->type);
+	}
+}
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
new file mode 100644
index 0000000000..125140979d
--- /dev/null
+++ b/arch/mips/mm/uasm.c
@@ -0,0 +1,644 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * A small micro-assembler. It is intentionally kept simple, does only
+ * support a subset of instructions, and does not try to hide pipeline
+ * effects like branch delay slots.
+ *
+ * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer
+ * Copyright (C) 2005, 2007  Maciej W. Rozycki
+ * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2012, 2013  MIPS Technologies, Inc.  All rights reserved.
+ */
+
+enum fields {
+	RS = 0x001,
+	RT = 0x002,
+	RD = 0x004,
+	RE = 0x008,
+	SIMM = 0x010,
+	UIMM = 0x020,
+	BIMM = 0x040,
+	JIMM = 0x080,
+	FUNC = 0x100,
+	SET = 0x200,
+	SCIMM = 0x400,
+	SIMM9 = 0x800,
+};
+
+#define OP_MASK		0x3f
+#define OP_SH		26
+#define RD_MASK		0x1f
+#define RD_SH		11
+#define RE_MASK		0x1f
+#define RE_SH		6
+#define IMM_MASK	0xffff
+#define IMM_SH		0
+#define JIMM_MASK	0x3ffffff
+#define JIMM_SH		0
+#define FUNC_MASK	0x3f
+#define FUNC_SH		0
+#define SET_MASK	0x7
+#define SET_SH		0
+#define SIMM9_SH	7
+#define SIMM9_MASK	0x1ff
+
+enum opcode {
+	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
+	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez,
+	insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1,
+	insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu,
+	insn_ddivu_r6, insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu,
+	insn_divu_r6, insn_dmfc0, insn_dmodu, insn_dmtc0, insn_dmultu,
+	insn_dmulu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, insn_dsll,
+	insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, insn_dsrl,
+	insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, insn_ins,
+	insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, insn_ld,
+	insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
+	insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
+	insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
+	insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor,
+	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
+	insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
+	insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
+	insn_srav, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync,
+	insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait,
+	insn_wsbh, insn_xor, insn_xori, insn_yield,
+	insn_invalid /* insn_invalid must be last */
+};
+
+struct insn {
+	u32 match;
+	enum fields fields;
+};
+
+static inline u32 build_rs(u32 arg)
+{
+	WARN(arg & ~RS_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & RS_MASK) << RS_SH;
+}
+
+static inline u32 build_rt(u32 arg)
+{
+	WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & RT_MASK) << RT_SH;
+}
+
+static inline u32 build_rd(u32 arg)
+{
+	WARN(arg & ~RD_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & RD_MASK) << RD_SH;
+}
+
+static inline u32 build_re(u32 arg)
+{
+	WARN(arg & ~RE_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & RE_MASK) << RE_SH;
+}
+
+static inline u32 build_simm(s32 arg)
+{
+	WARN(arg > 0x7fff || arg < -0x8000,
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	return arg & 0xffff;
+}
+
+static inline u32 build_uimm(u32 arg)
+{
+	WARN(arg & ~IMM_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return arg & IMM_MASK;
+}
+
+static inline u32 build_scimm(u32 arg)
+{
+	WARN(arg & ~SCIMM_MASK,
+	     KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & SCIMM_MASK) << SCIMM_SH;
+}
+
+static inline u32 build_scimm9(s32 arg)
+{
+	WARN((arg > 0xff || arg < -0x100),
+	       KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & SIMM9_MASK) << SIMM9_SH;
+}
+
+static inline u32 build_func(u32 arg)
+{
+	WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return arg & FUNC_MASK;
+}
+
+static inline u32 build_set(u32 arg)
+{
+	WARN(arg & ~SET_MASK, KERN_WARNING "Micro-assembler field overflow\n");
+
+	return arg & SET_MASK;
+}
+
+static void build_insn(u32 **buf, enum opcode opc, ...);
+
+#define I_u1u2u3(op)					\
+Ip_u1u2u3(op)						\
+{							\
+	build_insn(buf, insn##op, a, b, c);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_s3s1s2(op)					\
+Ip_s3s1s2(op)						\
+{							\
+	build_insn(buf, insn##op, b, c, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1u3(op)					\
+Ip_u2u1u3(op)						\
+{							\
+	build_insn(buf, insn##op, b, a, c);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u3u2u1(op)					\
+Ip_u3u2u1(op)						\
+{							\
+	build_insn(buf, insn##op, c, b, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u3u1u2(op)					\
+Ip_u3u1u2(op)						\
+{							\
+	build_insn(buf, insn##op, b, c, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u1u2s3(op)					\
+Ip_u1u2s3(op)						\
+{							\
+	build_insn(buf, insn##op, a, b, c);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2s3u1(op)					\
+Ip_u2s3u1(op)						\
+{							\
+	build_insn(buf, insn##op, c, a, b);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1s3(op)					\
+Ip_u2u1s3(op)						\
+{							\
+	build_insn(buf, insn##op, b, a, c);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1msbu3(op)					\
+Ip_u2u1msbu3(op)					\
+{							\
+	build_insn(buf, insn##op, b, a, c+d-1, c);	\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1msb32u3(op)				\
+Ip_u2u1msbu3(op)					\
+{							\
+	build_insn(buf, insn##op, b, a, c+d-33, c);	\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1msb32msb3(op)				\
+Ip_u2u1msbu3(op)					\
+{							\
+	build_insn(buf, insn##op, b, a, c+d-33, c-32);	\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1msbdu3(op)				\
+Ip_u2u1msbu3(op)					\
+{							\
+	build_insn(buf, insn##op, b, a, d-1, c);	\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u1u2(op)					\
+Ip_u1u2(op)						\
+{							\
+	build_insn(buf, insn##op, a, b);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u2u1(op)					\
+Ip_u1u2(op)						\
+{							\
+	build_insn(buf, insn##op, b, a);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u1s2(op)					\
+Ip_u1s2(op)						\
+{							\
+	build_insn(buf, insn##op, a, b);		\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_u1(op)					\
+Ip_u1(op)						\
+{							\
+	build_insn(buf, insn##op, a);			\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+#define I_0(op)						\
+Ip_0(op)						\
+{							\
+	build_insn(buf, insn##op);			\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
+I_u2u1s3(_addiu)
+I_u3u1u2(_addu)
+I_u2u1u3(_andi)
+I_u3u1u2(_and)
+I_u1u2s3(_beq)
+I_u1u2s3(_beql)
+I_u1s2(_bgez)
+I_u1s2(_bgezl)
+I_u1s2(_bgtz)
+I_u1s2(_blez)
+I_u1s2(_bltz)
+I_u1s2(_bltzl)
+I_u1u2s3(_bne)
+I_u1(_break)
+I_u2s3u1(_cache)
+I_u1u2(_cfc1)
+I_u2u1(_cfcmsa)
+I_u1u2(_ctc1)
+I_u2u1(_ctcmsa)
+I_u1u2(_ddivu)
+I_u3u1u2(_ddivu_r6)
+I_u1u2u3(_dmfc0)
+I_u3u1u2(_dmodu)
+I_u1u2u3(_dmtc0)
+I_u1u2(_dmultu)
+I_u3u1u2(_dmulu)
+I_u2u1s3(_daddiu)
+I_u3u1u2(_daddu)
+I_u1(_di);
+I_u1u2(_divu)
+I_u3u1u2(_divu_r6)
+I_u2u1(_dsbh);
+I_u2u1(_dshd);
+I_u2u1u3(_dsll)
+I_u2u1u3(_dsll32)
+I_u3u2u1(_dsllv)
+I_u2u1u3(_dsra)
+I_u2u1u3(_dsra32)
+I_u3u2u1(_dsrav)
+I_u2u1u3(_dsrl)
+I_u2u1u3(_dsrl32)
+I_u3u2u1(_dsrlv)
+I_u2u1u3(_drotr)
+I_u2u1u3(_drotr32)
+I_u3u1u2(_dsubu)
+I_0(_eret)
+I_u2u1msbdu3(_ext)
+I_u2u1msbu3(_ins)
+I_u1(_j)
+I_u1(_jal)
+I_u2u1(_jalr)
+I_u1(_jr)
+I_u2s3u1(_lb)
+I_u2s3u1(_lbu)
+I_u2s3u1(_ld)
+I_u2s3u1(_lh)
+I_u2s3u1(_lhu)
+I_u2s3u1(_ll)
+I_u2s3u1(_lld)
+I_u1s2(_lui)
+I_u2s3u1(_lw)
+I_u2s3u1(_lwu)
+I_u1u2u3(_mfc0)
+I_u1u2u3(_mfhc0)
+I_u3u1u2(_modu)
+I_u3u1u2(_movn)
+I_u3u1u2(_movz)
+I_u1(_mfhi)
+I_u1(_mflo)
+I_u1u2u3(_mtc0)
+I_u1u2u3(_mthc0)
+I_u1(_mthi)
+I_u1(_mtlo)
+I_u3u1u2(_mul)
+I_u1u2(_multu)
+I_u3u1u2(_mulu)
+I_u3u1u2(_muhu)
+I_u3u1u2(_nor)
+I_u3u1u2(_or)
+I_u2u1u3(_ori)
+I_0(_rfe)
+I_u2s3u1(_sb)
+I_u2s3u1(_sc)
+I_u2s3u1(_scd)
+I_u2s3u1(_sd)
+I_u3u1u2(_seleqz)
+I_u3u1u2(_selnez)
+I_u2s3u1(_sh)
+I_u2u1u3(_sll)
+I_u3u2u1(_sllv)
+I_s3s1s2(_slt)
+I_u2u1s3(_slti)
+I_u2u1s3(_sltiu)
+I_u3u1u2(_sltu)
+I_u2u1u3(_sra)
+I_u3u2u1(_srav)
+I_u2u1u3(_srl)
+I_u3u2u1(_srlv)
+I_u2u1u3(_rotr)
+I_u3u1u2(_subu)
+I_u2s3u1(_sw)
+I_u1(_sync)
+I_0(_tlbp)
+I_0(_tlbr)
+I_0(_tlbwi)
+I_0(_tlbwr)
+I_u1(_wait);
+I_u2u1(_wsbh)
+I_u3u1u2(_xor)
+I_u2u1u3(_xori)
+I_u2u1(_yield)
+I_u2u1msbu3(_dins);
+I_u2u1msb32u3(_dinsm);
+I_u2u1msb32msb3(_dinsu);
+I_u1(_syscall);
+I_u1u2s3(_bbit0);
+I_u1u2s3(_bbit1);
+I_u3u1u2(_lwx)
+I_u3u1u2(_ldx)
+I_u1u2(_ldpte)
+I_u2u1u3(_lddir)
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#include <asm/octeon/octeon.h>
+void uasm_i_pref(u32 **buf, unsigned int a, signed int b,
+			    unsigned int c)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && a <= 24 && a != 5)
+		/*
+		 * As per erratum Core-14449, replace prefetches 0-4,
+		 * 6-24 with 'pref 28'.
+		 */
+		build_insn(buf, insn_pref, c, 28, b);
+	else
+		build_insn(buf, insn_pref, c, a, b);
+}
+UASM_EXPORT_SYMBOL(uasm_i_pref);
+#else
+I_u2s3u1(_pref)
+#endif
+
+/* Handle labels. */
+void uasm_build_label(struct uasm_label **lab, u32 *addr, int lid)
+{
+	(*lab)->addr = addr;
+	(*lab)->lab = lid;
+	(*lab)++;
+}
+UASM_EXPORT_SYMBOL(uasm_build_label);
+
+int uasm_in_compat_space_p(long addr)
+{
+	/* Is this address in 32bit compat space? */
+	return addr == (int)addr;
+}
+UASM_EXPORT_SYMBOL(uasm_in_compat_space_p);
+
+static int uasm_rel_highest(long val)
+{
+#ifdef CONFIG_64BIT
+	return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
+#else
+	return 0;
+#endif
+}
+
+static int uasm_rel_higher(long val)
+{
+#ifdef CONFIG_64BIT
+	return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
+#else
+	return 0;
+#endif
+}
+
+int uasm_rel_hi(long val)
+{
+	return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+}
+UASM_EXPORT_SYMBOL(uasm_rel_hi);
+
+int uasm_rel_lo(long val)
+{
+	return ((val & 0xffff) ^ 0x8000) - 0x8000;
+}
+UASM_EXPORT_SYMBOL(uasm_rel_lo);
+
+void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
+{
+	if (!uasm_in_compat_space_p(addr)) {
+		uasm_i_lui(buf, rs, uasm_rel_highest(addr));
+		if (uasm_rel_higher(addr))
+			uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
+		if (uasm_rel_hi(addr)) {
+			uasm_i_dsll(buf, rs, rs, 16);
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_hi(addr));
+			uasm_i_dsll(buf, rs, rs, 16);
+		} else
+			uasm_i_dsll32(buf, rs, rs, 0);
+	} else
+		uasm_i_lui(buf, rs, uasm_rel_hi(addr));
+}
+UASM_EXPORT_SYMBOL(UASM_i_LA_mostly);
+
+void UASM_i_LA(u32 **buf, unsigned int rs, long addr)
+{
+	UASM_i_LA_mostly(buf, rs, addr);
+	if (uasm_rel_lo(addr)) {
+		if (!uasm_in_compat_space_p(addr))
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_lo(addr));
+		else
+			uasm_i_addiu(buf, rs, rs,
+					uasm_rel_lo(addr));
+	}
+}
+UASM_EXPORT_SYMBOL(UASM_i_LA);
+
+/* Handle relocations. */
+void uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid)
+{
+	(*rel)->addr = addr;
+	(*rel)->type = R_MIPS_PC16;
+	(*rel)->lab = lid;
+	(*rel)++;
+}
+UASM_EXPORT_SYMBOL(uasm_r_mips_pc16);
+
+static inline void __resolve_relocs(struct uasm_reloc *rel,
+				    struct uasm_label *lab);
+
+void uasm_resolve_relocs(struct uasm_reloc *rel,
+				  struct uasm_label *lab)
+{
+	struct uasm_label *l;
+
+	for (; rel->lab != UASM_LABEL_INVALID; rel++)
+		for (l = lab; l->lab != UASM_LABEL_INVALID; l++)
+			if (rel->lab == l->lab)
+				__resolve_relocs(rel, l);
+}
+UASM_EXPORT_SYMBOL(uasm_resolve_relocs);
+
+void uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end,
+			       long off)
+{
+	for (; rel->lab != UASM_LABEL_INVALID; rel++)
+		if (rel->addr >= first && rel->addr < end)
+			rel->addr += off;
+}
+UASM_EXPORT_SYMBOL(uasm_move_relocs);
+
+void uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end,
+			       long off)
+{
+	for (; lab->lab != UASM_LABEL_INVALID; lab++)
+		if (lab->addr >= first && lab->addr < end)
+			lab->addr += off;
+}
+UASM_EXPORT_SYMBOL(uasm_move_labels);
+
+void uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab,
+				u32 *first, u32 *end, u32 *target)
+{
+	long off = (long)(target - first);
+
+	memcpy(target, first, (end - first) * sizeof(u32));
+
+	uasm_move_relocs(rel, first, end, off);
+	uasm_move_labels(lab, first, end, off);
+}
+UASM_EXPORT_SYMBOL(uasm_copy_handler);
+
+int uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr)
+{
+	for (; rel->lab != UASM_LABEL_INVALID; rel++) {
+		if (rel->addr == addr
+		    && (rel->type == R_MIPS_PC16
+			|| rel->type == R_MIPS_26))
+			return 1;
+	}
+
+	return 0;
+}
+UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay);
+
+/* Convenience functions for labeled branches. */
+void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			   int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bltz(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bltz);
+
+void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_b(p, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_b);
+
+void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1,
+			  unsigned int r2, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_beq(p, r1, r2, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_beq);
+
+void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			   int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_beqz(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_beqz);
+
+void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			    int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_beqzl(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_beqzl);
+
+void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+			  unsigned int reg2, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bne(p, reg1, reg2, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bne);
+
+void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			   int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bnez(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bnez);
+
+void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			    int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bgezl(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bgezl);
+
+void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			   int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bgez(p, reg, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bgez);
+
+void uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			    unsigned int bit, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bbit0(p, reg, bit, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bbit0);
+
+void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg,
+			    unsigned int bit, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bbit1(p, reg, bit, 0);
+}
+UASM_EXPORT_SYMBOL(uasm_il_bbit1);